Add a type parameter to hash_delimiter to tag hashed information

author Joel Rosdahl <joel@rosdahl.net>

Wed, 5 May 2010 20:16:28 +0000 (22:16 +0200)

committer Joel Rosdahl <joel@rosdahl.net>

Wed, 5 May 2010 20:16:28 +0000 (22:16 +0200)
author Joel Rosdahl <joel@rosdahl.net>
Wed, 5 May 2010 20:16:28 +0000 (22:16 +0200)
committer Joel Rosdahl <joel@rosdahl.net>
Wed, 5 May 2010 20:16:28 +0000 (22:16 +0200)
diff --git a/NEWS.txt b/NEWS.txt

index f857dca21db5402db803afee0d000f7f58b1e6ef..76baf160096446bf1f3e26645b7449394c8a3cbd 100644 (file)
--- a/NEWS.txt
+++ b/NEWS.txt
@@ -120,6 +120,10 @@ Bug fixes
  
      - Fixed NFS issues.
  
+    - Computation of the hash sum has been improved to decrease the risk of
+      hash collisions. For instance, the compiler arguments `-X -Y` and `-X-Y`
+      previously contributed equally to the hash sum.
+
      - Bail out on too hard compiler options `--coverage`, `-fprofile-arcs`,
        `-fprofile-generate`, `-fprofile-use`, `-ftest-coverage` and
        `-save-temps`. Also bail out on `@file` style options.
diff --git a/ccache.c b/ccache.c

index 727d9b8ff357b8ede548bc9609f3ff538f511840..87bece7abd329cc4b8fe45e7122bfd697d5694b0 100644 (file)
--- a/ccache.c
+++ b/ccache.c
@@ -701,12 +701,21 @@ get_object_name_from_cpp(ARGS *args, struct mdfour *hash)
            as it gives the wrong line numbers for warnings. Pity.
         */
         if (!enable_unify) {
+               hash_delimiter(hash, "cpp");
                 if (!process_preprocessed_file(hash, path_stdout)) {
                         stats_update(STATS_ERROR);
                         unlink(path_stderr);
                         failed();
                 }
         } else {
+               /*
+                * When we are doing the unifying tricks we need to include the
+                * input file name in the hash to get the warnings right.
+                */
+               hash_delimiter(hash, "unifyfilename");
+               hash_string(hash, input_file);
+
+               hash_delimiter(hash, "unifycpp");
                 if (unify_hash(hash, path_stdout) != 0) {
                         stats_update(STATS_ERROR);
                         unlink(path_stderr);
@@ -715,10 +724,10 @@ get_object_name_from_cpp(ARGS *args, struct mdfour *hash)
                 }
         }
  
+       hash_delimiter(hash, "cppstderr");
         if (!hash_file(hash, path_stderr)) {
                 fatal("Failed to open %s", path_stderr);
         }
-       hash_delimiter(hash);
  
         i_tmpfile = path_stdout;
  
@@ -764,23 +773,13 @@ static void calculate_common_hash(ARGS *args, struct mdfour *hash)
         char *p;
  
         hash_string(hash, HASH_PREFIX);
-       hash_delimiter(hash);
-
-       /*
-        * When we are doing the unifying tricks we need to include the input
-        * file name in the hash to get the warnings right.
-        */
-       if (enable_unify) {
-               hash_string(hash, input_file);
-       }
-       hash_delimiter(hash);
  
         /*
          * We have to hash the extension, as a .i file isn't treated the same
          * by the compiler as a .ii file.
          */
+       hash_delimiter(hash, "ext");
         hash_string(hash, i_extension);
-       hash_delimiter(hash);
  
         if (stat(args->argv[0], &st) != 0) {
                 cc_log("Couldn't stat the compiler (%s)", args->argv[0]);
@@ -798,29 +797,30 @@ static void calculate_common_hash(ARGS *args, struct mdfour *hash)
         if (strcmp(compilercheck, "none") == 0) {
                 /* Do nothing. */
         } else if (strcmp(compilercheck, "content") == 0) {
+               hash_delimiter(hash, "cc_content");
                 hash_file(hash, args->argv[0]);
         } else { /* mtime */
+               hash_delimiter(hash, "cc_mtime");
                 hash_int(hash, st.st_size);
                 hash_int(hash, st.st_mtime);
         }
-       hash_delimiter(hash);
  
         /*
          * Also hash the compiler name as some compilers use hard links and
          * behave differently depending on the real name.
          */
+       hash_delimiter(hash, "cc_name");
         hash_string(hash, basename(args->argv[0]));
-       hash_delimiter(hash);
  
         /* Possibly hash the current working directory. */
         if (getenv("CCACHE_HASHDIR")) {
                 char *cwd = gnu_getcwd();
                 if (cwd) {
+                       hash_delimiter(hash, "cwd");
                         hash_string(hash, cwd);
                         free(cwd);
                 }
         }
-       hash_delimiter(hash);
  
         p = getenv("CCACHE_EXTRAFILES");
         if (p) {
@@ -829,11 +829,11 @@ static void calculate_common_hash(ARGS *args, struct mdfour *hash)
                 q = p;
                 while ((path = strtok(q, " \t\r\n"))) {
                         cc_log("Hashing extra file %s", path);
+                       hash_delimiter(hash, "extrafile");
                         if (!hash_file(hash, path)) {
                                 stats_update(STATS_BADEXTRAFILE);
                                 failed();
                         }
-                       hash_delimiter(hash);
                         q = NULL;
                 }
                 free(p);
@@ -899,16 +899,16 @@ static struct file_hash *calculate_object_hash(
                     stat(args->argv[i] + 8, &st) == 0) {
                         /* If given a explicit specs file, then hash that file,
                            but don't include the path to it in the hash. */
+                       hash_delimiter(hash, "specs");
                         if (!hash_file(hash, args->argv[i] + 8)) {
                                 failed();
                         }
-                       hash_delimiter(hash);
                         continue;
                 }
  
                 /* All other arguments are included in the hash. */
+               hash_delimiter(hash, "arg");
                 hash_string(hash, args->argv[i]);
-               hash_delimiter(hash);
         }
  
         if (direct_mode) {
@@ -917,9 +917,10 @@ static struct file_hash *calculate_object_hash(
                  * __FILE__, so make sure that the hash is unique for the file
                  * name.
                  */
+               hash_delimiter(hash, "inputfile");
                 hash_string(hash, input_file);
-               hash_delimiter(hash);
  
+               hash_delimiter(hash, "sourcecode");
                 result = hash_source_code_file(hash, input_file);
                 if (result & HASH_SOURCE_CODE_ERROR) {
                         failed();
diff --git a/ccache.h b/ccache.h

index 2902bd9c75ed00e05b7e2d1050d95ce418963d83..f514057d24240adbd44be1560097a28b98a9fa12 100644 (file)
--- a/ccache.h
+++ b/ccache.h
@@ -55,7 +55,7 @@ enum stats {
  };
  
  void hash_start(struct mdfour *md);
-void hash_delimiter(struct mdfour *md);
+void hash_delimiter(struct mdfour *md, const char* type);
  void hash_string(struct mdfour *md, const char *s);
  void hash_int(struct mdfour *md, int x);
  int hash_fd(struct mdfour *md, int fd);
diff --git a/hash.c b/hash.c

index 2a468d50815cc07aeb121d7b104298a2d38acf6f..530cc7d9ca020fdf671211404e61bbd016144e6e 100644 (file)
--- a/hash.c
+++ b/hash.c
@@ -26,7 +26,7 @@
  #include <string.h>
  #include <unistd.h>
  
-#define HASH_DELIMITER "\000cCaChE\000"
+#define HASH_DELIMITER "\000cCaChE"
  
  void hash_buffer(struct mdfour *md, const void *s, size_t len)
  {
@@ -38,10 +38,20 @@ void hash_start(struct mdfour *md)
         mdfour_begin(md);
  }
  
-void hash_delimiter(struct mdfour *md)
+/*
+ * Hash some data that is unlikely to occur in the input. The idea is twofold:
+ *
+ * - Delimit things like arguments from each other (e.g., so that -I -O2 and
+ *   -I-O2 hash differently).
+ * - Tag different types of hashed information so that it's possible to do
+ *   conditional hashing of information in a safe way (e.g., if we want to hash
+ *   information X if CCACHE_A is set and information Y if CCACHE_B is set,
+ *   there should never be a hash collision risk).
+ */
+void hash_delimiter(struct mdfour *md, const char *type)
  {
-       /* Hash some string that is unlikely to occur in the input. */
         hash_buffer(md, HASH_DELIMITER, sizeof(HASH_DELIMITER));
+       hash_buffer(md, type, strlen(type) + 1); /* Include NUL. */
  }
  
  void hash_string(struct mdfour *md, const char *s)
diff --git a/hashutil.c b/hashutil.c

index b7407ac1bc8739fda31657663d5c7b676207a688..7d71e76e93ac48faea98174b313d61bc2ad62f4f 100644 (file)
--- a/hashutil.c
+++ b/hashutil.c
@@ -167,7 +167,7 @@ end:
                 cc_log("Found __DATE__ in %s", path);
                 time_t t = time(NULL);
                 struct tm *now = localtime(&t);
-               hash_delimiter(hash);
+               hash_delimiter(hash, "date");
                 hash_buffer(hash, &now->tm_year, sizeof(now->tm_year));
                 hash_buffer(hash, &now->tm_mon, sizeof(now->tm_mon));
                 hash_buffer(hash, &now->tm_mday, sizeof(now->tm_mday));
author	Joel Rosdahl <joel@rosdahl.net>
	Wed, 5 May 2010 20:16:28 +0000 (22:16 +0200)
committer	Joel Rosdahl <joel@rosdahl.net>
	Wed, 5 May 2010 20:16:28 +0000 (22:16 +0200)
NEWS.txt		patch \| blob \| blame \| history
ccache.c		patch \| blob \| blame \| history
ccache.h		patch \| blob \| blame \| history
hash.c		patch \| blob \| blame \| history
hashutil.c		patch \| blob \| blame \| history