From: Joel Rosdahl <joel@rosdahl.net>
Date: Wed, 5 May 2010 20:16:28 +0000 (+0200)
Subject: Add a type parameter to hash_delimiter to tag hashed information
X-Git-Tag: v3.0pre1~29
X-Git-Url: http://git.ipfire.org/gitweb/gitweb.cgi?a=commitdiff_plain;h=11a8b1439a90b70460e517aa092179f50d4d93ed;p=thirdparty%2Fccache.git

Add a type parameter to hash_delimiter to tag hashed information
---

diff --git a/NEWS.txt b/NEWS.txt
index f857dca21..76baf1600 100644
--- a/NEWS.txt
+++ b/NEWS.txt
@@ -120,6 +120,10 @@ Bug fixes
 
     - Fixed NFS issues.
 
+    - Computation of the hash sum has been improved to decrease the risk of
+      hash collisions. For instance, the compiler arguments `-X -Y` and `-X-Y`
+      previously contributed equally to the hash sum.
+
     - Bail out on too hard compiler options `--coverage`, `-fprofile-arcs`,
       `-fprofile-generate`, `-fprofile-use`, `-ftest-coverage` and
       `-save-temps`. Also bail out on `@file` style options.
diff --git a/ccache.c b/ccache.c
index 727d9b8ff..87bece7ab 100644
--- a/ccache.c
+++ b/ccache.c
@@ -701,12 +701,21 @@ get_object_name_from_cpp(ARGS *args, struct mdfour *hash)
 	   as it gives the wrong line numbers for warnings. Pity.
 	*/
 	if (!enable_unify) {
+		hash_delimiter(hash, "cpp");
 		if (!process_preprocessed_file(hash, path_stdout)) {
 			stats_update(STATS_ERROR);
 			unlink(path_stderr);
 			failed();
 		}
 	} else {
+		/*
+		 * When we are doing the unifying tricks we need to include the
+		 * input file name in the hash to get the warnings right.
+		 */
+		hash_delimiter(hash, "unifyfilename");
+		hash_string(hash, input_file);
+
+		hash_delimiter(hash, "unifycpp");
 		if (unify_hash(hash, path_stdout) != 0) {
 			stats_update(STATS_ERROR);
 			unlink(path_stderr);
@@ -715,10 +724,10 @@ get_object_name_from_cpp(ARGS *args, struct mdfour *hash)
 		}
 	}
 
+	hash_delimiter(hash, "cppstderr");
 	if (!hash_file(hash, path_stderr)) {
 		fatal("Failed to open %s", path_stderr);
 	}
-	hash_delimiter(hash);
 
 	i_tmpfile = path_stdout;
 
@@ -764,23 +773,13 @@ static void calculate_common_hash(ARGS *args, struct mdfour *hash)
 	char *p;
 
 	hash_string(hash, HASH_PREFIX);
-	hash_delimiter(hash);
-
-	/*
-	 * When we are doing the unifying tricks we need to include the input
-	 * file name in the hash to get the warnings right.
-	 */
-	if (enable_unify) {
-		hash_string(hash, input_file);
-	}
-	hash_delimiter(hash);
 
 	/*
 	 * We have to hash the extension, as a .i file isn't treated the same
 	 * by the compiler as a .ii file.
 	 */
+	hash_delimiter(hash, "ext");
 	hash_string(hash, i_extension);
-	hash_delimiter(hash);
 
 	if (stat(args->argv[0], &st) != 0) {
 		cc_log("Couldn't stat the compiler (%s)", args->argv[0]);
@@ -798,29 +797,30 @@ static void calculate_common_hash(ARGS *args, struct mdfour *hash)
 	if (strcmp(compilercheck, "none") == 0) {
 		/* Do nothing. */
 	} else if (strcmp(compilercheck, "content") == 0) {
+		hash_delimiter(hash, "cc_content");
 		hash_file(hash, args->argv[0]);
 	} else { /* mtime */
+		hash_delimiter(hash, "cc_mtime");
 		hash_int(hash, st.st_size);
 		hash_int(hash, st.st_mtime);
 	}
-	hash_delimiter(hash);
 
 	/*
 	 * Also hash the compiler name as some compilers use hard links and
 	 * behave differently depending on the real name.
 	 */
+	hash_delimiter(hash, "cc_name");
 	hash_string(hash, basename(args->argv[0]));
-	hash_delimiter(hash);
 
 	/* Possibly hash the current working directory. */
 	if (getenv("CCACHE_HASHDIR")) {
 		char *cwd = gnu_getcwd();
 		if (cwd) {
+			hash_delimiter(hash, "cwd");
 			hash_string(hash, cwd);
 			free(cwd);
 		}
 	}
-	hash_delimiter(hash);
 
 	p = getenv("CCACHE_EXTRAFILES");
 	if (p) {
@@ -829,11 +829,11 @@ static void calculate_common_hash(ARGS *args, struct mdfour *hash)
 		q = p;
 		while ((path = strtok(q, " \t\r\n"))) {
 			cc_log("Hashing extra file %s", path);
+			hash_delimiter(hash, "extrafile");
 			if (!hash_file(hash, path)) {
 				stats_update(STATS_BADEXTRAFILE);
 				failed();
 			}
-			hash_delimiter(hash);
 			q = NULL;
 		}
 		free(p);
@@ -899,16 +899,16 @@ static struct file_hash *calculate_object_hash(
 		    stat(args->argv[i] + 8, &st) == 0) {
 			/* If given a explicit specs file, then hash that file,
 			   but don't include the path to it in the hash. */
+			hash_delimiter(hash, "specs");
 			if (!hash_file(hash, args->argv[i] + 8)) {
 				failed();
 			}
-			hash_delimiter(hash);
 			continue;
 		}
 
 		/* All other arguments are included in the hash. */
+		hash_delimiter(hash, "arg");
 		hash_string(hash, args->argv[i]);
-		hash_delimiter(hash);
 	}
 
 	if (direct_mode) {
@@ -917,9 +917,10 @@ static struct file_hash *calculate_object_hash(
 		 * __FILE__, so make sure that the hash is unique for the file
 		 * name.
 		 */
+		hash_delimiter(hash, "inputfile");
 		hash_string(hash, input_file);
-		hash_delimiter(hash);
 
+		hash_delimiter(hash, "sourcecode");
 		result = hash_source_code_file(hash, input_file);
 		if (result & HASH_SOURCE_CODE_ERROR) {
 			failed();
diff --git a/ccache.h b/ccache.h
index 2902bd9c7..f514057d2 100644
--- a/ccache.h
+++ b/ccache.h
@@ -55,7 +55,7 @@ enum stats {
 };
 
 void hash_start(struct mdfour *md);
-void hash_delimiter(struct mdfour *md);
+void hash_delimiter(struct mdfour *md, const char* type);
 void hash_string(struct mdfour *md, const char *s);
 void hash_int(struct mdfour *md, int x);
 int hash_fd(struct mdfour *md, int fd);
diff --git a/hash.c b/hash.c
index 2a468d508..530cc7d9c 100644
--- a/hash.c
+++ b/hash.c
@@ -26,7 +26,7 @@
 #include <string.h>
 #include <unistd.h>
 
-#define HASH_DELIMITER "\000cCaChE\000"
+#define HASH_DELIMITER "\000cCaChE"
 
 void hash_buffer(struct mdfour *md, const void *s, size_t len)
 {
@@ -38,10 +38,20 @@ void hash_start(struct mdfour *md)
 	mdfour_begin(md);
 }
 
-void hash_delimiter(struct mdfour *md)
+/*
+ * Hash some data that is unlikely to occur in the input. The idea is twofold:
+ *
+ * - Delimit things like arguments from each other (e.g., so that -I -O2 and
+ *   -I-O2 hash differently).
+ * - Tag different types of hashed information so that it's possible to do
+ *   conditional hashing of information in a safe way (e.g., if we want to hash
+ *   information X if CCACHE_A is set and information Y if CCACHE_B is set,
+ *   there should never be a hash collision risk).
+ */
+void hash_delimiter(struct mdfour *md, const char *type)
 {
-	/* Hash some string that is unlikely to occur in the input. */
 	hash_buffer(md, HASH_DELIMITER, sizeof(HASH_DELIMITER));
+	hash_buffer(md, type, strlen(type) + 1); /* Include NUL. */
 }
 
 void hash_string(struct mdfour *md, const char *s)
diff --git a/hashutil.c b/hashutil.c
index b7407ac1b..7d71e76e9 100644
--- a/hashutil.c
+++ b/hashutil.c
@@ -167,7 +167,7 @@ end:
 		cc_log("Found __DATE__ in %s", path);
 		time_t t = time(NULL);
 		struct tm *now = localtime(&t);
-		hash_delimiter(hash);
+		hash_delimiter(hash, "date");
 		hash_buffer(hash, &now->tm_year, sizeof(now->tm_year));
 		hash_buffer(hash, &now->tm_mon, sizeof(now->tm_mon));
 		hash_buffer(hash, &now->tm_mday, sizeof(now->tm_mday));