]> git.ipfire.org Git - thirdparty/ccache.git/commitdiff
Unify comments when hashing source code to increase hit rate
authorJoel Rosdahl <joel@rosdahl.net>
Mon, 14 Dec 2009 21:31:28 +0000 (22:31 +0100)
committerJoel Rosdahl <joel@rosdahl.net>
Tue, 5 Jan 2010 17:53:04 +0000 (18:53 +0100)
Makefile.in
ccache.c
comments.c [new file with mode: 0644]
comments.h [new file with mode: 0644]
manifest.c
test.sh

index 03d569da649e125d460c3ce23f39d985ca621428..5aaedcd6ab36f0bc6f23b953677393a0a8c53c27 100644 (file)
@@ -19,11 +19,11 @@ libs = @LIBS@ -lm
 sources = \
     ccache.c mdfour.c hash.c execute.c util.c args.c stats.c \
     cleanup.c snprintf.c unify.c manifest.c hashtable.c hashtable_itr.c \
-    murmurhashneutral2.c hashutil.c
+    murmurhashneutral2.c hashutil.c comments.c
 
 headers = \
     ccache.h hashtable.h hashtable_itr.h hashtable_private.h hashutil.h \
-    manifest.h mdfour.h murmurhashneutral2.h
+    manifest.h mdfour.h murmurhashneutral2.h comments.h
 
 objs = $(sources:.c=.o)
 
index 897bf083b03ba5b86fdd1c7be4a8590c953fb785..29d74c7ffee389593dcbff66420c1f7052db31d9 100644 (file)
--- a/ccache.c
+++ b/ccache.c
@@ -27,6 +27,7 @@
 #include "hashtable_itr.h"
 #include "hashutil.h"
 #include "manifest.h"
+#include "comments.h"
 
 #include <getopt.h>
 
@@ -244,7 +245,7 @@ static void remember_include_file(char *path, size_t path_len)
        struct mdfour fhash;
        struct stat st;
        int fd = -1;
-       int ret;
+       char *data = (char *)-1;
 
        if (!included_files) {
                goto ignore;
@@ -284,19 +285,20 @@ static void remember_include_file(char *path, size_t path_len)
                cc_log("Include file \"%s\" too new\n", path);
                goto failure;
        }
-       hash_start(&fhash);
-       ret = hash_fd(&fhash, fd);
-       if (!ret) {
-               cc_log("Failed hashing include file \"%s\"\n", path);
+       data = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
+       if (data == (char *)-1) {
+               cc_log("Failed to mmap %s\n", path);
                goto failure;
        }
 
-       /* Hashing OK. */
+       hash_start(&fhash);
+       hash_string_ignoring_comments(&fhash, data, st.st_size);
+
        h = x_malloc(sizeof(*h));
        hash_result_as_bytes(&fhash, h->hash);
        h->size = fhash.totalN;
        hashtable_insert(included_files, path, h);
-       close(fd);
+       munmap(data, st.st_size);
        return;
 
 failure:
@@ -307,6 +309,9 @@ failure:
        /* Fall through. */
 ignore:
        free(path);
+       if (data != (char *)-1) {
+               munmap(data, st.st_size);
+       }
        if (fd != -1) {
                close(fd);
        }
@@ -786,7 +791,7 @@ static int find_hash(ARGS *args, enum findhash_call_mode mode)
 
        switch (mode) {
        case FINDHASH_DIRECT_MODE:
-               if (!hash_file(&hash, input_file)) {
+               if (!hash_file_ignoring_comments(&hash, input_file)) {
                        cc_log("Failed hashing %s\n", input_file);
                        failed();
                }
@@ -980,7 +985,8 @@ static void from_cache(enum fromcache_call_mode mode, int put_object_in_manifest
        /* Create or update the manifest file. */
        if (put_object_in_manifest && included_files) {
                if (manifest_put(manifest_path, object_hash, included_files)) {
-                       cc_log("Added object file hash to manifest\n");
+                       cc_log("Added object file hash to manifest %s\n",
+                               manifest_path);
                        /* Update timestamp for LRU cleanup. */
 #ifdef HAVE_UTIMES
                        utimes(manifest_path, NULL);
diff --git a/comments.c b/comments.c
new file mode 100644 (file)
index 0000000..daf9d8f
--- /dev/null
@@ -0,0 +1,135 @@
+/*
+ * Copyright (C) Joel Rosdahl 2009
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 675 Mass
+ * Ave, Cambridge, MA 02139, USA.
+ */
+
+#include "ccache.h"
+#include "comments.h"
+
+#define HASH(ch)                                                       \
+       do {                                                            \
+               hashbuf[hashbuflen] = ch;                               \
+               hashbuflen++;                                           \
+               if (hashbuflen == sizeof(hashbuf)) {                    \
+                       hash_buffer(hash, hashbuf, sizeof(hashbuf));    \
+                       hashbuflen = 0;                                 \
+               }                                                       \
+       } while (0)
+
+void hash_string_ignoring_comments(
+       struct mdfour *hash, const char *str, size_t len)
+{
+       const char *p;
+       const char *end;
+       char hashbuf[64];
+       size_t hashbuflen = 0;
+
+       p = str;
+       end = str + len;
+       while (1) {
+               if (p >= end) {
+                       goto end;
+               }
+               switch (*p) {
+               case '/':
+                       if (p+1 == end) {
+                               break;
+                       }
+                       switch (*(p+1)) {
+                       case '*':
+                               HASH(' '); /* Don't paste tokens together when
+                                           * removing the comment. */
+                               p += 2;
+                               while (p+1 < end
+                                      && (*p != '*' || *(p+1) != '/')) {
+                                       if (*p == '\n') {
+                                               /* Keep line numbers. */
+                                               HASH('\n');
+                                       }
+                                       p++;
+                               }
+                               if (p+1 == end) {
+                                       goto end;
+                               }
+                               p += 2;
+                               continue;
+
+                       case '/':
+                               p += 2;
+                               while (p < end
+                                      && (*p != '\n' || *(p-1) == '\\')) {
+                                       p++;
+                               }
+                               continue;
+
+                       default:
+                               break;
+                       }
+                       break;
+
+               case '"':
+                       HASH(*p);
+                       p++;
+                       while (p < end && (*p != '"' || *(p-1) == '\\')) {
+                               HASH(*p);
+                               p++;
+                       }
+                       if (p == end) {
+                               goto end;
+                       }
+                       break;
+
+               default:
+                       break;
+               }
+
+               HASH(*p);
+               p++;
+       }
+
+end:
+       hash_buffer(hash, hashbuf, hashbuflen);
+}
+
+/*
+ * Add contents of a file to a hash, but don't hash comments. Returns 1 on
+ * success, otherwise 0.
+ */
+int hash_file_ignoring_comments(struct mdfour *hash, const char *path)
+{
+       int fd;
+       struct stat st;
+       char *data;
+
+       fd = open(path, O_RDONLY);
+       if (fd == -1) {
+               return 0;
+       }
+       if (fstat(fd, &st) == -1) {
+               close(fd);
+               return 0;
+       }
+       data = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
+       close(fd);
+       if (data == (void *)-1) {
+               return 0;
+       }
+
+       hash_string_ignoring_comments(hash, data, st.st_size);
+
+       munmap(data, st.st_size);
+       return 1;
+}
diff --git a/comments.h b/comments.h
new file mode 100644 (file)
index 0000000..bbc3f06
--- /dev/null
@@ -0,0 +1,10 @@
+#ifndef COMMENTS_H
+#define COMMENTS_H
+
+#include "mdfour.h"
+
+void hash_string_ignoring_comments(
+       struct mdfour *hash, const char *str, size_t len);
+int hash_file_ignoring_comments(struct mdfour *hash, const char *path);
+
+#endif
index 9a17113341a705e8ff08eb9576e003d5f76f6fa1..31f5b6be474c5945cd3895b555addb36850fd1ad 100644 (file)
@@ -29,6 +29,7 @@
 #include "hashutil.h"
 #include "manifest.h"
 #include "murmurhashneutral2.h"
+#include "comments.h"
 
 extern char *temp_dir;
 
@@ -345,7 +346,8 @@ static int verify_object(struct manifest *mf, struct object *obj,
                if (!actual) {
                        actual = x_malloc(sizeof(*actual));
                        hash_start(&hash);
-                       if (!hash_file(&hash, mf->files[fi->index])) {
+                       if (!hash_file_ignoring_comments(
+                                   &hash, mf->files[fi->index])) {
                                cc_log("Failed hashing %s\n",
                                       mf->files[fi->index]);
                                free(actual);
diff --git a/test.sh b/test.sh
index e8cf31e41b8ff4a162edcfe4e1a7fe6a39c4c22a..d35b3addbeb551b67cf58370e48aae4267e78c8e 100755 (executable)
--- a/test.sh
+++ b/test.sh
@@ -442,6 +442,7 @@ EOF
     ##################################################################
     # Check that -Wp,-MMD,file.d works.
     testname="-Wp,-MMD"
+    $CCACHE -C >/dev/null
     $CCACHE -z >/dev/null
     $CCACHE $COMPILER -c -Wp,-MMD,other.d test.c
     checkstat 'cache hit (direct)' 0
@@ -530,6 +531,7 @@ EOF
     ##################################################################
     # Check that -MF works.
     testname="-MF"
+    $CCACHE -C >/dev/null
     $CCACHE -z >/dev/null
     $CCACHE $COMPILER -c -MD -MF other.d test.c
     checkstat 'cache hit (direct)' 0
@@ -603,6 +605,45 @@ EOF
     checkstat 'cache miss' 1
     checkfile stderr-mf.txt "`cat stderr-orig.txt`"
 
+    ##################################################################
+    # Check that changes in comments are ignored when hashing.
+    testname="changes in comments"
+    $CCACHE -C >/dev/null
+    $CCACHE -z >/dev/null
+    cat <<EOF >comments.h
+/*
+ * /* foo comment
+ */
+EOF
+    cat <<'EOF' >comments.c
+#include "comments.h"
+char test[] = "\
+/* apple */ // banana"; // foo comment
+EOF
+    sleep 1 # Sleep to make the include file trusted.
+
+    $CCACHE $COMPILER -c comments.c
+    checkstat 'cache hit (direct)' 0
+    checkstat 'cache hit (preprocessed)' 0
+    checkstat 'cache miss' 1
+
+    sed -i 's/foo/ignored/' comments.h comments.c
+    sleep 1 # Sleep to make the include file trusted.
+
+    $CCACHE $COMPILER -c comments.c
+    checkstat 'cache hit (direct)' 1
+    checkstat 'cache hit (preprocessed)' 0
+    checkstat 'cache miss' 1
+
+    # Check that comment-like string contents are hashed.
+    sed -i 's/apple/orange/' comments.c
+    sleep 1 # Sleep to make the include file trusted.
+
+    $CCACHE $COMPILER -c comments.c
+    checkstat 'cache hit (direct)' 1
+    checkstat 'cache hit (preprocessed)' 0
+    checkstat 'cache miss' 2
+
     ##################################################################
     # Reset things.
     CCACHE_NODIRECT=1