First working version of the direct mode

author Joel Rosdahl <joel@rosdahl.net>

Sat, 14 Nov 2009 15:14:55 +0000 (16:14 +0100)

committer Joel Rosdahl <joel@rosdahl.net>

Tue, 5 Jan 2010 17:53:01 +0000 (18:53 +0100)
author Joel Rosdahl <joel@rosdahl.net>
Sat, 14 Nov 2009 15:14:55 +0000 (16:14 +0100)
committer Joel Rosdahl <joel@rosdahl.net>
Tue, 5 Jan 2010 17:53:01 +0000 (18:53 +0100)
diff --git a/Makefile.in b/Makefile.in

index 8fdd5b125423bd55d3dc738163f7e3502c861f8f..641fff478a3dca67241d8fba5769e2edceef97d6 100644 (file)
--- a/Makefile.in
+++ b/Makefile.in
@@ -13,11 +13,12 @@ CFLAGS=$(CPPFLAGS) @CFLAGS@
  LDFLAGS=@LDFLAGS@
  EXEEXT=@EXEEXT@
  
-LIBS= @LIBS@
+libs = @LIBS@ -lm
  
  sources = \
         ccache.c mdfour.c hash.c execute.c util.c args.c stats.c \
-       cleanup.c snprintf.c unify.c
+       cleanup.c snprintf.c unify.c manifest.c hashtable.c hashtable_itr.c \
+       murmurhashneutral2.c hashutil.c
  
  objs = $(sources:.c=.o)
  
@@ -26,7 +27,7 @@ all: ccache$(EXEEXT)
  docs: ccache.1 web/ccache-man.html
  
  ccache$(EXEEXT): $(objs)
-       $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(objs) $(LIBS)
+       $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(objs) $(libs)
  
  ccache.1: ccache.yo
         -yodl2man -o ccache.1 ccache.yo
diff --git a/NEWS b/NEWS

index bd17b17ff9649fa3c5f03e8c72d2f448b559e548..ab0657e3f482933409184cc797fb74c110127dec 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -3,6 +3,13 @@ Changes since ccache 2.4
  
  New features and improvements:
  
+  - Implemented a mode called "direct mode" where ccache computes a hash of the
+    source code and compiler arguments without running the preprocessor. By not
+    running the preprocessor, CPU usage is reduced; the runtime is about
+    0.2-1.0 times that of ccache running in traditional mode. The relative
+    speedup will be higher when I/O is fast (e.g., when files are in the disk
+    cache). The direct mode can be disabled by setting CCACHE_NODIRECT.
+
    - Object files are now by default stored compressed in the cache. The runtime
      cost is negligible, and more files will fit in the ccache directory and in
      the disk cache. CCACHE_NOCOMPRESS can be set to disable object file
diff --git a/ccache.c b/ccache.c

index f644b750c6311ec372c6f3090249be2cc44bd31d..25cd7cb3eec583db44d1d98d928824d2174f6a40 100644 (file)
--- a/ccache.c
+++ b/ccache.c
@@ -5,6 +5,7 @@
  
     Copyright (C) Andrew Tridgell 2002
     Copyright (C) Martin Pool 2003
+   Copyright (C) Joel Rosdahl 2009
  
     This program is free software; you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -22,13 +23,18 @@
  */
  
  #include "ccache.h"
+#include "hashtable.h"
+#include "hashtable_itr.h"
+#include "hashutil.h"
+#include "manifest.h"
+
  #include <getopt.h>
  
  /* the base cache directory */
  char *cache_dir = NULL;
  
  /* the directory for temporary files */
-static char *temp_dir = NULL;
+char *temp_dir = NULL;
  
  /* the debug logfile name, if set */
  char *cache_logfile = NULL;
@@ -45,8 +51,42 @@ static char *output_file;
  /* the source file */
  static char *input_file;
  
-/* the name of the file containing the cached object code */
-static char *hashname;
+/*
+ * the hash of the file containing the cached object code (abcdef[...]-size)
+ */
+struct file_hash *object_hash;
+
+/*
+ * the name of the file containing the cached object code (abcdef[...]-size)
+ */
+static char *object_name;
+
+/*
+ * the full path of the file containing the cached object code
+ * (cachedir/a/b/cdef[...]-size)
+ */
+static char *object_path;
+
+/* the name of the manifest file without the extension (abcdef[...]-size) */
+static char *manifest_name;
+
+/*
+ * the full path of the file containing the manifest
+ * (cachedir/a/b/cdef[...]-size.manifest)
+ */
+static char *manifest_path;
+
+/*
+ * Time of compilation. Used to see if include files have changed after
+ * compilation.
+ */
+static time_t time_of_compilation;
+
+/*
+ * Files included by the preprocessor and their hashes/sizes. Key: file path.
+ * Value: struct file_hash.
+ */
+static struct hashtable *included_files;
  
  /* the extension of the file after pre-processing */
  static const char *i_extension;
@@ -66,6 +106,9 @@ char *stats_file = NULL;
  /* can we safely use the unification hashing backend? */
  static int enable_unify;
  
+/* should we use the direct mode? */
+static int enable_direct = 1;
+
  /* a list of supported file extensions, and the equivalent
     extension for code that has been through the pre-processor
  */
@@ -88,6 +131,17 @@ static struct {
         {"ii", "ii"},
         {NULL, NULL}};
  
+enum fromcache_call_mode {
+       FROMCACHE_DIRECT_MODE,
+       FROMCACHE_CPP_MODE,
+       FROMCACHE_COMPILED_MODE
+};
+
+enum findhash_call_mode {
+       FINDHASH_DIRECT_MODE,
+       FINDHASH_CPP_MODE
+};
+
  /*
    something went badly wrong - just execute the real compiler
  */
@@ -129,29 +183,197 @@ static void failed(void)
         exit(1);
  }
  
+char *format_file_hash(struct file_hash *file_hash)
+{
+       char *ret;
+       int i;
  
-/* return a string to be used to distinguish temporary files
-   this also tries to cope with NFS by adding the local hostname
-*/
-static const char *tmp_string(void)
+       ret = x_malloc(53);
+       for (i = 0; i < 16; i++) {
+               sprintf(&ret[i*2], "%02x", (unsigned)file_hash->hash[i]);
+       }
+       sprintf(&ret[i*2], "-%u", (unsigned)file_hash->size);
+
+       return ret;
+}
+
+/*
+ * Transform a name to a full path into the cache directory, creating needed
+ * sublevels if needed. Caller frees.
+ */
+static char *get_path_in_cache(const char *name, const char *suffix,
+                               int nlevels)
  {
-       static char *ret;
+       int i;
+       char *path;
+       char *result;
  
-       if (!ret) {
-               char hostname[200];
-               strcpy(hostname, "unknown");
-#if HAVE_GETHOSTNAME
-               gethostname(hostname, sizeof(hostname)-1);
-#endif
-               hostname[sizeof(hostname)-1] = 0;
-               if (asprintf(&ret, "%s.%u", hostname, (unsigned)getpid()) == -1) {
-                       fatal("Could not allocate tmp_string\n");
+       path = x_strdup(cache_dir);
+       for (i = 0; i < nlevels; ++i) {
+               char *p;
+               x_asprintf(&p, "%s/%c", path, name[i]);
+               free(path);
+               path = p;
+               if (create_dir(path) != 0) {
+                       cc_log("failed to create %s\n", path);
+                       failed();
                 }
         }
+       x_asprintf(&result, "%s/%s%s", path, name + nlevels, suffix);
+       free(path);
+       return result;
+}
  
-       return ret;
+/* Takes over ownership of path. */
+static void remember_include_file(char *path, size_t path_len)
+{
+       struct file_hash *h;
+       struct mdfour fhash;
+       struct stat st;
+       int fd = -1;
+       int ret;
+
+       if (!included_files) {
+               goto ignore;
+       }
+
+       if (path_len >= 2 && (path[0] == '<' && path[path_len - 1] == '>')) {
+               /* Typically <built-in> or <command-line>. */
+               goto ignore;
+       }
+
+       if (strcmp(path, input_file) == 0) {
+               /* Don't remember the input file. */
+               goto ignore;
+       }
+
+       if (hashtable_search(included_files, path)) {
+               /* Already known include file. */
+               goto ignore;
+       }
+
+       /* Let's hash the include file. */
+       fd = open(path, O_RDONLY|O_BINARY);
+       if (fd == -1) {
+               cc_log("Failed to open include file \"%s\"\n", path);
+               goto failure;
+       }
+       if (fstat(fd, &st) != 0) {
+               cc_log("Failed to fstat include file \"%s\"\n", path);
+               goto failure;
+       }
+       if (S_ISDIR(st.st_mode)) {
+               cc_log("Ignoring directory %s\n", path);
+               goto ignore;
+       }
+       if (st.st_mtime >= time_of_compilation
+           || st.st_ctime >= time_of_compilation) {
+               cc_log("Include file \"%s\" too new\n", path);
+               goto failure;
+       }
+       hash_start(&fhash);
+       ret = hash_fd(&fhash, fd);
+       if (!ret) {
+               cc_log("Failed hashing include file \"%s\"\n", path);
+               goto failure;
+       }
+
+       /* Hashing OK. */
+       h = x_malloc(sizeof(*h));
+       hash_result_as_bytes(&fhash, h->hash);
+       h->size = fhash.totalN;
+       hashtable_insert(included_files, path, h);
+       close(fd);
+       return;
+
+failure:
+       cc_log("Disabling direct mode\n");
+       enable_direct = 0;
+       hashtable_destroy(included_files, 1);
+       included_files = NULL;
+       /* Fall through. */
+ignore:
+       free(path);
+       if (fd != -1) {
+               close(fd);
+       }
  }
  
+/*
+ * This function reads and hashes a file. While doing this, it also does these
+ * things with preprocessor lines starting with a hash:
+ *
+ * - TODO: Makes include file paths that match CCACHE_RELDIRS relative.
+ * - Stores the paths of included files in the global variable included_files.
+ */
+static void process_preprocessed_file(struct mdfour *hash, const char *path)
+{
+       int fd;
+       char *data;
+       char *p, *q, *end;
+       off_t size;
+       struct stat st;
+
+       fd = open(path, O_RDONLY);
+       if (fd == -1) {
+               cc_log("failed to open %s\n", path);
+               failed();
+       }
+       if (fstat(fd, &st) != 0) {
+               cc_log("failed to fstat %s\n", path);
+               failed();
+       }
+       size = st.st_size;
+       data = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
+       if (data == (void *)-1) {
+               cc_log("failed to mmap %s\n", path);
+               failed();
+       }
+       close(fd);
+
+       if (enable_direct) {
+               included_files = create_hashtable(1000, hash_from_string,
+                                                 strings_equal);
+       }
+
+       /* Bytes between p and q are pending to be hashed. */
+       end = data + size;
+       p = data;
+       q = data;
+       while (q < end - 1) {
+               if (q[0] == '#' && q[1] == ' ' /* Need to avoid "#pragma"... */
+                   && (q == data || q[-1] == '\n')) {
+                       char *path;
+
+                       while (q < end && *q != '"') {
+                               q++;
+                       }
+                       q++;
+                       if (q >= end) {
+                               failed();
+                       }
+                       /* q points to the beginning of an include file path */
+                       hash_buffer(hash, p, q - p);
+                       p = q;
+                       while (q < end && *q != '"') {
+                               q++;
+                       }
+                       /* p and q span the include file path */
+                       path = x_strndup(p, q - p);
+                       /* TODO: Rewrite path using CCACHE_RELDIRS here. */
+                       hash_string(hash, path);
+                       if (enable_direct) {
+                               remember_include_file(path, q - p);
+                       }
+                       p = q;
+               } else {
+                       q++;
+               }
+       }
+
+       hash_buffer(hash, p, (end - p));
+       munmap(data, size);
+}
  
  /* run the real compiler and put the result in cache */
  static void to_cache(ARGS *args)
@@ -161,9 +383,9 @@ static void to_cache(ARGS *args)
         struct stat st1, st2;
         int status;
  
-       x_asprintf(&tmp_stdout, "%s.tmp.stdout.%s", hashname, tmp_string());
-       x_asprintf(&tmp_stderr, "%s.tmp.stderr.%s", hashname, tmp_string());
-       x_asprintf(&tmp_hashname, "%s.tmp.%s", hashname, tmp_string());
+       x_asprintf(&tmp_stdout, "%s.tmp.stdout.%s", object_path, tmp_string());
+       x_asprintf(&tmp_stderr, "%s.tmp.stderr.%s", object_path, tmp_string());
+       x_asprintf(&tmp_hashname, "%s.tmp.%s", object_path, tmp_string());
  
         args_add(args, "-o");
         args_add(args, tmp_hashname);
@@ -231,11 +453,11 @@ static void to_cache(ARGS *args)
                 failed();
         }
  
-       x_asprintf(&path_stderr, "%s.stderr", hashname);
+       x_asprintf(&path_stderr, "%s.stderr", object_path);
  
         if (stat(tmp_stderr, &st1) != 0 ||
                 stat(tmp_hashname, &st2) != 0 ||
-               move_file(tmp_hashname, hashname) != 0 ||
+               move_file(tmp_hashname, object_path) != 0 ||
                 move_file(tmp_stderr, path_stderr) != 0) {
                 cc_log("failed to rename tmp files - %s\n", strerror(errno));
                 stats_update(STATS_ERROR);
@@ -246,7 +468,7 @@ static void to_cache(ARGS *args)
         /* do an extra stat on the cache files for
            the size statistics */
         if (stat(path_stderr, &st1) != 0 ||
-               stat(hashname, &st2) != 0) {
+               stat(object_path, &st2) != 0) {
                 cc_log("failed to stat cache files - %s\n", strerror(errno));
                 stats_update(STATS_ERROR);
                 failed();
@@ -262,106 +484,18 @@ static void to_cache(ARGS *args)
         free(path_stderr);
  }
  
-/* find the hash for a command. The hash includes all argument lists,
-   plus the output from running the compiler with -E */
-static void find_hash(ARGS *args)
+/*
+ * Find the object file name by running the compiler in preprocessor mode.
+ * Returns the hash as a heap-allocated hex string.
+ */
+static struct file_hash *
+get_object_name_from_cpp(ARGS *args, struct mdfour *hash)
  {
-       int i;
-       char *path_stdout, *path_stderr;
-       char *hash_dir;
-       char *s;
-       struct stat st;
-       int status;
-       int nlevels = 2;
         char *input_base;
         char *tmp;
-       struct mdfour hash;
-
-
-       if ((s = getenv("CCACHE_NLEVELS"))) {
-               nlevels = atoi(s);
-               if (nlevels < 1) nlevels = 1;
-               if (nlevels > 8) nlevels = 8;
-       }
-
-       hash_start(&hash);
-
-       /* when we are doing the unifying tricks we need to include
-           the input file name in the hash to get the warnings right */
-       if (enable_unify) {
-               hash_string(&hash, input_file);
-       }
-
-       /* we have to hash the extension, as a .i file isn't treated the same
-          by the compiler as a .ii file */
-       hash_string(&hash, i_extension);
-
-       /* first the arguments */
-       for (i=1;i<args->argc;i++) {
-               /* some arguments don't contribute to the hash. The
-                  theory is that these arguments will change the
-                  output of -E if they are going to have any effect
-                  at all, or they only affect linking */
-               if (i < args->argc-1) {
-                       if (strcmp(args->argv[i], "-I") == 0 ||
-                           strcmp(args->argv[i], "-include") == 0 ||
-                           strcmp(args->argv[i], "-L") == 0 ||
-                           strcmp(args->argv[i], "-D") == 0 ||
-                           strcmp(args->argv[i], "-idirafter") == 0 ||
-                           strcmp(args->argv[i], "-isystem") == 0) {
-                               i++;
-                               continue;
-                       }
-               }
-               if (strncmp(args->argv[i], "-I", 2) == 0 ||
-                   strncmp(args->argv[i], "-L", 2) == 0 ||
-                   strncmp(args->argv[i], "-D", 2) == 0 ||
-                   strncmp(args->argv[i], "-idirafter", 10) == 0 ||
-                   strncmp(args->argv[i], "-isystem", 8) == 0) {
-                       continue;
-               }
-
-               if (strncmp(args->argv[i], "--specs=", 8) == 0 &&
-                   stat(args->argv[i]+8, &st) == 0) {
-                       /* if given a explicit specs file, then hash that file, but
-                          don't include the path to it in the hash */
-                       hash_file(&hash, args->argv[i]+8);
-                       continue;
-               }
-
-               /* all other arguments are included in the hash */
-               hash_string(&hash, args->argv[i]);
-       }
-
-       /* the compiler driver size and date. This is a simple minded way
-          to try and detect compiler upgrades. It is not 100% reliable */
-       if (stat(args->argv[0], &st) != 0) {
-               cc_log("Couldn't stat the compiler!? (argv[0]='%s')\n", args->argv[0]);
-               stats_update(STATS_COMPILER);
-               failed();
-       }
-
-       /* also include the hash of the compiler name - as some compilers
-          use hard links and behave differently depending on the real name */
-       if (st.st_nlink > 1) {
-               hash_string(&hash, str_basename(args->argv[0]));
-       }
-
-       if (getenv("CCACHE_HASH_COMPILER")) {
-               hash_file(&hash, args->argv[0]);
-       } else if (!getenv("CCACHE_NOHASH_SIZE_MTIME")) {
-               hash_int(&hash, st.st_size);
-               hash_int(&hash, st.st_mtime);
-       }
-
-       /* possibly hash the current working directory */
-       if (getenv("CCACHE_HASHDIR")) {
-               char *cwd = gnu_getcwd();
-               if (cwd) {
-                       hash_string(&hash, cwd);
-                       free(cwd);
-               }
-       }
+       char *path_stdout, *path_stderr;
+       int status;
+       struct file_hash *result;
  
         /* ~/hello.c -> tmp.hello.123.i
            limit the basename to 10
@@ -378,9 +512,11 @@ static void find_hash(ARGS *args)
  
         /* now the run */
         x_asprintf(&path_stdout, "%s/%s.tmp.%s.%s", temp_dir,
-                  input_base, tmp_string(),
-                  i_extension);
-       x_asprintf(&path_stderr, "%s/tmp.cpp_stderr.%s", temp_dir, tmp_string());
+                  input_base, tmp_string(), i_extension);
+       x_asprintf(&path_stderr, "%s/tmp.cpp_stderr.%s", temp_dir,
+                  tmp_string());
+
+       time_of_compilation = time(NULL);
  
         if (!direct_i_file) {
                 /* run cpp on the input file to obtain the .i */
@@ -420,64 +556,183 @@ static void find_hash(ARGS *args)
            as it gives the wrong line numbers for warnings. Pity.
         */
         if (!enable_unify) {
-               hash_file(&hash, path_stdout);
+               process_preprocessed_file(hash, path_stdout);
         } else {
-               if (unify_hash(&hash, path_stdout) != 0) {
+               if (unify_hash(hash, path_stdout) != 0) {
                         stats_update(STATS_ERROR);
                         failed();
                 }
         }
-       hash_file(&hash, path_stderr);
+
+       if (!hash_file(hash, path_stderr)) {
+               fatal("Failed to open %s\n", path_stderr);
+       }
  
         i_tmpfile = path_stdout;
  
         if (!getenv("CCACHE_CPP2")) {
-               /* if we are using the CPP trick then we need to remember this stderr
-                  data and output it just before the main stderr from the compiler
-                  pass */
+               /* if we are using the CPP trick then we need to remember this
+                  stderr stderr data and output it just before the main stderr
+                  from the compiler pass */
                 cpp_stderr = path_stderr;
         } else {
                 unlink(path_stderr);
                 free(path_stderr);
         }
  
-       /* we use a N level subdir for the cache path to reduce the impact
-          on filesystems which are slow for large directories
-       */
-       s = hash_result(&hash);
-       x_asprintf(&hash_dir, "%s/%c", cache_dir, s[0]);
-       x_asprintf(&stats_file, "%s/stats", hash_dir);
-       for (i=1; i<nlevels; i++) {
-               char *p;
-               if (create_dir(hash_dir) != 0) {
-                       cc_log("failed to create %s\n", hash_dir);
-                       failed();
+       result = x_malloc(sizeof(*result));
+       hash_result_as_bytes(hash, result->hash);
+       result->size = hash->totalN;
+       return result;
+}
+
+/* find the hash for a command. The hash includes all argument lists,
+   plus the output from running the compiler with -E */
+static int find_hash(ARGS *args, enum findhash_call_mode mode)
+{
+       int i;
+       char *s;
+       struct stat st;
+       int nlevels = 2;
+       struct mdfour hash;
+
+       if ((s = getenv("CCACHE_NLEVELS"))) {
+               nlevels = atoi(s);
+               if (nlevels < 1) nlevels = 1;
+               if (nlevels > 8) nlevels = 8;
+       }
+
+       hash_start(&hash);
+
+       /* when we are doing the unifying tricks we need to include
+          the input file name in the hash to get the warnings right */
+       if (enable_unify) {
+               hash_string(&hash, input_file);
+       }
+
+       /* we have to hash the extension, as a .i file isn't treated the same
+          by the compiler as a .ii file */
+       hash_string(&hash, i_extension);
+
+       /* first the arguments */
+       for (i=1;i<args->argc;i++) {
+               /* -L doesn't affect compilation. */
+               if (i < args->argc-1 && strcmp(args->argv[i], "-L") == 0) {
+                       i++;
+                       continue;
+               }
+               if (strncmp(args->argv[i], "-L", 2) == 0) {
+                       continue;
+               }
+
+               if (mode == FINDHASH_CPP_MODE) {
+                       /* When using the preprocessor, some arguments don't
+                          contribute to the hash. The theory is that these
+                          arguments will change the output of -E if they are
+                          going to have any effect at all. */
+                       if (i < args->argc-1) {
+                               if (strcmp(args->argv[i], "-I") == 0 ||
+                                   strcmp(args->argv[i], "-include") == 0 ||
+                                   strcmp(args->argv[i], "-D") == 0 ||
+                                   strcmp(args->argv[i], "-idirafter") == 0 ||
+                                   strcmp(args->argv[i], "-isystem") == 0) {
+                                       i++;
+                                       continue;
+                               }
+                       }
+                       if (strncmp(args->argv[i], "-I", 2) == 0 ||
+                           strncmp(args->argv[i], "-D", 2) == 0 ||
+                           strncmp(args->argv[i], "-idirafter", 10) == 0 ||
+                           strncmp(args->argv[i], "-isystem", 8) == 0) {
+                               continue;
+                       }
+               }
+
+               if (strncmp(args->argv[i], "--specs=", 8) == 0 &&
+                   stat(args->argv[i]+8, &st) == 0) {
+                       /* If given a explicit specs file, then hash that file,
+                          but don't include the path to it in the hash. */
+                       if (!hash_file(&hash, args->argv[i]+8)) {
+                               failed();
+                       }
+                       continue;
                 }
-               x_asprintf(&p, "%s/%c", hash_dir, s[i]);
-               free(hash_dir);
-               hash_dir = p;
+
+               /* All other arguments are included in the hash. */
+               hash_string(&hash, args->argv[i]);
         }
-       if (create_dir(hash_dir) != 0) {
-               cc_log("failed to create %s\n", hash_dir);
+
+       /* The compiler driver size and date. This is a simple minded way
+          to try and detect compiler upgrades. It is not 100% reliable. */
+       if (stat(args->argv[0], &st) != 0) {
+               cc_log("Couldn't stat the compiler!? (argv[0]='%s')\n", args->argv[0]);
+               stats_update(STATS_COMPILER);
                 failed();
         }
-       x_asprintf(&hashname, "%s/%s", hash_dir, s+nlevels);
-       free(hash_dir);
-}
  
+       /* also include the hash of the compiler name - as some compilers
+          use hard links and behave differently depending on the real name */
+       if (st.st_nlink > 1) {
+               hash_string(&hash, str_basename(args->argv[0]));
+       }
+
+       if (getenv("CCACHE_HASH_COMPILER")) {
+               hash_file(&hash, args->argv[0]);
+       } else if (!getenv("CCACHE_NOHASH_SIZE_MTIME")) {
+               hash_int(&hash, st.st_size);
+               hash_int(&hash, st.st_mtime);
+       }
+
+       /* possibly hash the current working directory */
+       if (getenv("CCACHE_HASHDIR")) {
+               char *cwd = gnu_getcwd();
+               if (cwd) {
+                       hash_string(&hash, cwd);
+                       free(cwd);
+               }
+       }
+
+       switch (mode) {
+       case FINDHASH_DIRECT_MODE:
+               if (!hash_file(&hash, input_file)) {
+                       failed();
+               }
+               manifest_name = hash_result(&hash);
+               manifest_path = get_path_in_cache(manifest_name, ".manifest",
+                                                 nlevels);
+               object_hash = manifest_get(manifest_path);
+               if (object_hash) {
+                       cc_log("Direct match\n");
+               } else {
+                       cc_log("No direct match\n");
+                       return 0;
+               }
+               break;
+
+       case FINDHASH_CPP_MODE:
+               object_hash = get_object_name_from_cpp(args, &hash);
+               break;
+       }
+
+       object_name = format_file_hash(object_hash);
+       object_path = get_path_in_cache(object_name, "", nlevels);
+       x_asprintf(&stats_file, "%s/%c/stats", cache_dir, object_name[0]);
+
+       return 1;
+}
  
  /*
     try to return the compile result from cache. If we can return from
     cache then this function exits with the correct status code,
     otherwise it returns */
-static void from_cache(int first)
+static void from_cache(enum fromcache_call_mode mode)
  {
         int fd_stderr, fd_cpp_stderr;
         char *stderr_file;
         int ret;
         struct stat st;
  
-       x_asprintf(&stderr_file, "%s.stderr", hashname);
+       x_asprintf(&stderr_file, "%s.stderr", object_path);
         fd_stderr = open(stderr_file, O_RDONLY | O_BINARY);
         if (fd_stderr == -1) {
                 /* it isn't in cache ... */
@@ -486,7 +741,7 @@ static void from_cache(int first)
         }
  
         /* make sure the output is there too */
-       if (stat(hashname, &st) != 0) {
+       if (stat(object_path, &st) != 0) {
                 close(fd_stderr);
                 unlink(stderr_file);
                 free(stderr_file);
@@ -494,13 +749,12 @@ static void from_cache(int first)
         }
  
         /* the user might be disabling cache hits */
+       if ((mode != FROMCACHE_COMPILED_MODE && getenv("CCACHE_RECACHE"))
  #ifndef ENABLE_ZLIB
         /* if the cache file is compressed we must recache */
-       if ((first && getenv("CCACHE_RECACHE")) ||
-               test_if_compressed(hashname) == 1) {
-#else
-       if (first && getenv("CCACHE_RECACHE")) {
+           || test_if_compressed(object_path)
  #endif
+       ) {
                 close(fd_stderr);
                 unlink(stderr_file);
                 free(stderr_file);
@@ -510,10 +764,10 @@ static void from_cache(int first)
         /* update timestamps for LRU cleanup
            also gives output_file a sensible mtime when hard-linking (for make) */
  #ifdef HAVE_UTIMES
-       utimes(hashname, NULL);
+       utimes(object_path, NULL);
         utimes(stderr_file, NULL);
  #else
-       utime(hashname, NULL);
+       utime(object_path, NULL);
         utime(stderr_file, NULL);
  #endif
  
@@ -523,10 +777,10 @@ static void from_cache(int first)
                 unlink(output_file);
                 /* only make a hardlink if the cache file is uncompressed */
                 if (getenv("CCACHE_HARDLINK") &&
-                       test_if_compressed(hashname) == 0) {
-                       ret = link(hashname, output_file);
+                   test_if_compressed(object_path) == 0) {
+                       ret = link(object_path, output_file);
                 } else {
-                       ret = copy_file(hashname, output_file);
+                       ret = copy_file(object_path, output_file);
                 }
         }
  
@@ -541,10 +795,10 @@ static void from_cache(int first)
         free(stderr_file);
  
         if (ret == -1) {
-               ret = copy_file(hashname, output_file);
+               ret = copy_file(object_path, output_file);
                 if (ret == -1) {
                         cc_log("failed to copy %s -> %s (%s)\n",
-                              hashname, output_file, strerror(errno));
+                              object_path, output_file, strerror(errno));
                         stats_update(STATS_ERROR);
                         failed();
                 }
@@ -573,12 +827,41 @@ static void from_cache(int first)
         copy_fd(fd_stderr, 2);
         close(fd_stderr);
  
-       /* and exit with the right status code */
-       if (first) {
-               cc_log("got cached result for %s\n", output_file);
-               stats_update(STATS_CACHED);
+       /* Create or update the manifest file. */
+       if (enable_direct && mode != FROMCACHE_DIRECT_MODE) {
+               if (manifest_put(manifest_path, object_hash, included_files)) {
+                       cc_log("Added %s (hash: %s) to manifest %s\n",
+                              output_file, object_name, manifest_name);
+                       /* Update timestamp for LRU cleanup. */
+#ifdef HAVE_UTIMES
+                       utimes(manifest_path, NULL);
+#else
+                       utime(manifest_path, NULL);
+#endif
+               } else {
+                       cc_log("Failed to add %s (hash: %s) to the manifest\n",
+                              output_file, object_name);
+               }
+       }
+
+       /* log the cache hit */
+       switch (mode) {
+       case FROMCACHE_DIRECT_MODE:
+               cc_log("Got cached result from manifest for %s\n", output_file);
+               stats_update(STATS_CACHEHIT_DIR);
+               break;
+
+       case FROMCACHE_CPP_MODE:
+               cc_log("Got cached result from preprocessor for %s\n",
+                      output_file);
+               stats_update(STATS_CACHEHIT_CPP);
+               break;
+
+       case FROMCACHE_COMPILED_MODE:
+               break;
         }
  
+       /* and exit with the right status code */
         exit(0);
  }
  
@@ -935,14 +1218,28 @@ static void ccache(int argc, char *argv[])
                 enable_unify = 1;
         }
  
-       /* process argument list, returning a new set of arguments for pre-processing */
+       if (getenv("CCACHE_NODIRECT") || enable_unify) {
+               enable_direct = 0;
+       }
+
+       /* process argument list, returning a new set of arguments for
+          pre-processing */
         process_args(orig_args->argc, orig_args->argv);
  
-       /* run with -E to find the hash */
-       find_hash(stripped_args);
+       /* try to find the hash using the manifest */
+       if (enable_direct && find_hash(stripped_args, FINDHASH_DIRECT_MODE)) {
+               /* if we can return from cache at this point then do */
+               from_cache(FROMCACHE_DIRECT_MODE);
+       }
+
+       /*
+        * Find the hash using the preprocessed output. Also updates
+        * included_files.
+        */
+       find_hash(stripped_args, FINDHASH_CPP_MODE);
  
         /* if we can return from cache at this point then do */
-       from_cache(1);
+       from_cache(FROMCACHE_CPP_MODE);
  
         if (getenv("CCACHE_READONLY")) {
                 cc_log("read-only set - doing real compile\n");
@@ -963,7 +1260,7 @@ static void ccache(int argc, char *argv[])
         to_cache(stripped_args);
  
         /* return from cache */
-       from_cache(0);
+       from_cache(FROMCACHE_COMPILED_MODE);
  
         /* oh oh! */
         cc_log("secondary from_cache failed!\n");
diff --git a/ccache.h b/ccache.h

index 9b5f86c49f4c6efb0268ee302948c8163941a21a..08c5fdc714d85c2626f67bd708dd17857bef1d49 100644 (file)
--- a/ccache.h
+++ b/ccache.h
@@ -1,3 +1,6 @@
+#ifndef CCACHE_H
+#define CCACHE_H
+
  #define CCACHE_VERSION "2.4"
  
  #include "config.h"
@@ -56,6 +59,7 @@
  #define COPY_TO_CACHE 2
  #endif
  
+/* statistics fields in storage order */
  enum stats {
         STATS_NONE=0,
         STATS_STDOUT,
@@ -65,7 +69,7 @@ enum stats {
         STATS_PREPROCESSOR,
         STATS_COMPILER,
         STATS_MISSING,
-       STATS_CACHED,
+       STATS_CACHEHIT_CPP,
         STATS_ARGS,
         STATS_LINK,
         STATS_NUMFILES,
@@ -79,6 +83,7 @@ enum stats {
         STATS_CONFTEST,
         STATS_UNSUPPORTED,
         STATS_OUTSTDOUT,
+       STATS_CACHEHIT_DIR,
  
         STATS_END
  };
@@ -90,8 +95,10 @@ typedef unsigned uint32;
  void hash_start(struct mdfour *md);
  void hash_string(struct mdfour *md, const char *s);
  void hash_int(struct mdfour *md, int x);
-void hash_file(struct mdfour *md, const char *fname);
+int hash_fd(struct mdfour *md, int fd);
+int hash_file(struct mdfour *md, const char *fname);
  char *hash_result(struct mdfour *md);
+void hash_result_as_bytes(struct mdfour *md, unsigned char *out);
  void hash_buffer(struct mdfour *md, const char *s, int len);
  
  void cc_log(const char *format, ...) ATTR_FORMAT(printf, 1, 2);
@@ -103,15 +110,19 @@ int move_file(const char *src, const char *dest);
  int test_if_compressed(const char *filename);
  
  int create_dir(const char *dir);
+const char *tmp_string(void);
+int create_hash_dir(char **dir, const char *hash, const char *cache_dir);
  int create_cachedirtag(const char *dir);
  void x_asprintf(char **ptr, const char *format, ...) ATTR_FORMAT(printf, 2, 3);
  char *x_strdup(const char *s);
+char *x_strndup(const char *s, size_t n);
  void *x_realloc(void *ptr, size_t size);
  void *x_malloc(size_t size);
  void traverse(const char *dir, void (*fn)(const char *, struct stat *));
  char *str_basename(const char *s);
  char *dirname(char *s);
-int lock_fd(int fd);
+int read_lock_fd(int fd);
+int write_lock_fd(int fd);
  size_t file_size(struct stat *st);
  int safe_open(const char *fname);
  char *x_realpath(const char *path);
@@ -180,3 +191,5 @@ typedef int (*COMPAR_FN_T)(const void *, const void *);
  #ifdef __CYGWIN__
  #undef HAVE_MKSTEMP
  #endif
+
+#endif /* ifndef CCACHE_H */
diff --git a/ccache.yo b/ccache.yo

index 30115d3a0d9eed4f9e72bdfcac6b71d973756000..6f3e766f3237dc4c5e48e53cdb9d5f75cd8b7b24 100644 (file)
--- a/ccache.yo
+++ b/ccache.yo
@@ -177,6 +177,9 @@ CCACHE_NOCOMPRESS then there is no compression used on files that go
  into the cache. However, this setting has no effect on how files are
  retrieved from the cache, compressed results will still be usable.
  
+dit(bf(CCACHE_NODIRECT)) If you set the environment variable
+CCACHE_NODIRECT then ccache will not use the direct mode.
+
  dit(bf(CCACHE_NOSTATS)) If you set the environment variable
  CCACHE_NOSTATS then ccache will not update the statistics files on
  each compile.
@@ -236,7 +239,8 @@ reformatting only. Note that using CCACHE_UNIFY changes the hash, so
  cached compiles with CCACHE_UNIFY set cannot be used when
  CCACHE_UNIFY is not set and vice versa. The reason the unifier is off
  by default is that it can give incorrect line number information in
-compiler warning messages.
+compiler warning messages. Enabling the unifier implies turning off
+the direct mode.
  
  dit(bf(CCACHE_EXTENSION)) Normally ccache tries to automatically
  determine the extension to use for intermediate C pre-processor files
diff --git a/dump-manifest b/dump-manifest

new file mode 100755 (executable)

index 0000000..e97de26
--- /dev/null
+++ b/dump-manifest
@@ -0,0 +1,54 @@
+#! /usr/bin/env python
+
+import sys
+from struct import calcsize, unpack
+
+f = open(sys.argv[1])
+
+def get_fixstr(n):
+    return f.read(n)
+
+def get_md4():
+    return f.read(16).encode("hex")
+
+def get_str():
+    result = ""
+    while True:
+        ch = f.read(1)
+        if ch == "\x00":
+            return result
+        result += ch
+
+def get_uint16():
+    return unpack("!H", f.read(2))[0]
+
+def get_uint32():
+    return unpack("!I", f.read(4))[0]
+
+print "Magic: %s" % get_fixstr(4)
+print "Version: %s" % get_uint16()
+
+print "File paths:"
+n = get_uint16()
+for i in range(n):
+    print "  %d: %s" % (i, get_str())
+
+print "File infos:"
+n = get_uint16()
+for i in range(n):
+    print "  %d:" % i
+    print "    Include path index: %d" % get_uint16()
+    print "    Hash: %s" % get_md4()
+    print "    Size: %d" %  get_uint32()
+
+print "Objects:"
+n = get_uint16()
+for i in range(n):
+    print "  %d:" % i
+    print "    File hash indexes:",
+    m = get_uint16()
+    for j in range(m):
+        print get_uint16(),
+    print
+    print "    Hash: %s" % get_md4()
+    print "    Size: %d" % get_uint32()
diff --git a/hash.c b/hash.c

index 4e6552780ef7ad1f16fb85c6991134f4617ba904..89addc4523e58143d6e2621f773005716da6b81b 100644 (file)
--- a/hash.c
+++ b/hash.c
@@ -20,6 +20,7 @@
  */
  
  #include "ccache.h"
+#include <stdio.h>
  
  void hash_buffer(struct mdfour *md, const char *s, int len)
  {
@@ -41,32 +42,51 @@ void hash_int(struct mdfour *md, int x)
         hash_buffer(md, (char *)&x, sizeof(x));
  }
  
-/* add contents of a file to the hash */
-void hash_file(struct mdfour *md, const char *fname)
+/*
+ * Add contents of an open file to the hash. Returns 1 on success, otherwise 0.
+ */
+int hash_fd(struct mdfour *md, int fd)
  {
         char buf[1024];
-       int fd, n;
+       int n;
+
+       while ((n = read(fd, buf, sizeof(buf))) > 0) {
+               hash_buffer(md, buf, n);
+       }
+       if (n == 0) {
+               return 1;
+       } else {
+               return 0;
+       }
+}
+
+/*
+ * Add contents of a file to the hash. Returns 1 on success, otherwise 0.
+ */
+int hash_file(struct mdfour *md, const char *fname)
+{
+       int fd;
+       int ret;
  
         fd = open(fname, O_RDONLY|O_BINARY);
         if (fd == -1) {
-               fatal("Failed to open %s\n", fname);
+               return 0;
         }
  
-       while ((n = read(fd, buf, sizeof(buf))) > 0) {
-               hash_buffer(md, buf, n);
-       }
+       ret = hash_fd(md, fd);
         close(fd);
+       return ret;
  }
  
-/* return the hash result as a static string */
+/* return the hash result as a hex string */
  char *hash_result(struct mdfour *md)
  {
         unsigned char sum[16];
-       static char ret[53];
+       char *ret;
         int i;
  
-       hash_buffer(md, NULL, 0);
-       mdfour_result(md, sum);
+       ret = x_malloc(53);
+       hash_result_as_bytes(md, sum);
  
         for (i=0;i<16;i++) {
                 sprintf(&ret[i*2], "%02x", (unsigned)sum[i]);
@@ -75,3 +95,10 @@ char *hash_result(struct mdfour *md)
  
         return ret;
  }
+
+/* return the hash result as 16 binary bytes */
+void hash_result_as_bytes(struct mdfour *md, unsigned char *out)
+{
+       hash_buffer(md, NULL, 0);
+       mdfour_result(md, out);
+}
diff --git a/hashtable.c b/hashtable.c

new file mode 100644 (file)

index 0000000..68d34e8
--- /dev/null
+++ b/hashtable.c
@@ -0,0 +1,303 @@
+/*
+  Copyright (c) 2002, 2004, Christopher Clark
+  All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright notice,
+      this list of conditions and the following disclaimer in the documentation
+      and/or other materials provided with the distribution.
+
+    * Neither the name of the original author; nor the names of any
+      contributors may be used to endorse or promote products derived from this
+      software without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+  POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include "hashtable.h"
+#include "hashtable_private.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+
+/*
+Credit for primes table: Aaron Krowne
+ http://br.endernet.org/~akrowne/
+ http://planetmath.org/encyclopedia/GoodHashTablePrimes.html
+*/
+static const unsigned int primes[] = {
+53, 97, 193, 389,
+769, 1543, 3079, 6151,
+12289, 24593, 49157, 98317,
+196613, 393241, 786433, 1572869,
+3145739, 6291469, 12582917, 25165843,
+50331653, 100663319, 201326611, 402653189,
+805306457, 1610612741
+};
+const unsigned int prime_table_length = sizeof(primes)/sizeof(primes[0]);
+const float max_load_factor = 0.65;
+
+/*****************************************************************************/
+struct hashtable *
+create_hashtable(unsigned int minsize,
+                 unsigned int (*hashf) (void*),
+                 int (*eqf) (void*,void*))
+{
+    struct hashtable *h;
+    unsigned int pindex, size = primes[0];
+    /* Check requested hashtable isn't too large */
+    if (minsize > (1u << 30)) return NULL;
+    /* Enforce size as prime */
+    for (pindex=0; pindex < prime_table_length; pindex++) {
+        if (primes[pindex] > minsize) { size = primes[pindex]; break; }
+    }
+    h = (struct hashtable *)malloc(sizeof(struct hashtable));
+    if (NULL == h) return NULL; /*oom*/
+    h->table = (struct entry **)malloc(sizeof(struct entry*) * size);
+    if (NULL == h->table) { free(h); return NULL; } /*oom*/
+    memset(h->table, 0, size * sizeof(struct entry *));
+    h->tablelength  = size;
+    h->primeindex   = pindex;
+    h->entrycount   = 0;
+    h->hashfn       = hashf;
+    h->eqfn         = eqf;
+    h->loadlimit    = (unsigned int) ceil(size * max_load_factor);
+    return h;
+}
+
+/*****************************************************************************/
+unsigned int
+hash(struct hashtable *h, void *k)
+{
+    /* Aim to protect against poor hash functions by adding logic here
+     * - logic taken from java 1.4 hashtable source */
+    unsigned int i = h->hashfn(k);
+    i += ~(i << 9);
+    i ^=  ((i >> 14) | (i << 18)); /* >>> */
+    i +=  (i << 4);
+    i ^=  ((i >> 10) | (i << 22)); /* >>> */
+    return i;
+}
+
+/*****************************************************************************/
+static int
+hashtable_expand(struct hashtable *h)
+{
+    /* Double the size of the table to accomodate more entries */
+    struct entry **newtable;
+    struct entry *e;
+    struct entry **pE;
+    unsigned int newsize, i, index;
+    /* Check we're not hitting max capacity */
+    if (h->primeindex == (prime_table_length - 1)) return 0;
+    newsize = primes[++(h->primeindex)];
+
+    newtable = (struct entry **)malloc(sizeof(struct entry*) * newsize);
+    if (NULL != newtable)
+    {
+        memset(newtable, 0, newsize * sizeof(struct entry *));
+        /* This algorithm is not 'stable'. ie. it reverses the list
+         * when it transfers entries between the tables */
+        for (i = 0; i < h->tablelength; i++) {
+            while (NULL != (e = h->table[i])) {
+                h->table[i] = e->next;
+                index = indexFor(newsize,e->h);
+                e->next = newtable[index];
+                newtable[index] = e;
+            }
+        }
+        free(h->table);
+        h->table = newtable;
+    }
+    /* Plan B: realloc instead */
+    else 
+    {
+        newtable = (struct entry **)
+                   realloc(h->table, newsize * sizeof(struct entry *));
+        if (NULL == newtable) { (h->primeindex)--; return 0; }
+        h->table = newtable;
+        memset(newtable[h->tablelength], 0, newsize - h->tablelength);
+        for (i = 0; i < h->tablelength; i++) {
+            for (pE = &(newtable[i]), e = *pE; e != NULL; e = *pE) {
+                index = indexFor(newsize,e->h);
+                if (index == i)
+                {
+                    pE = &(e->next);
+                }
+                else
+                {
+                    *pE = e->next;
+                    e->next = newtable[index];
+                    newtable[index] = e;
+                }
+            }
+        }
+    }
+    h->tablelength = newsize;
+    h->loadlimit   = (unsigned int) ceil(newsize * max_load_factor);
+    return -1;
+}
+
+/*****************************************************************************/
+unsigned int
+hashtable_count(struct hashtable *h)
+{
+    return h->entrycount;
+}
+
+/*****************************************************************************/
+int
+hashtable_insert(struct hashtable *h, void *k, void *v)
+{
+    /* This method allows duplicate keys - but they shouldn't be used */
+    unsigned int index;
+    struct entry *e;
+    if (++(h->entrycount) > h->loadlimit)
+    {
+        /* Ignore the return value. If expand fails, we should
+         * still try cramming just this value into the existing table
+         * -- we may not have memory for a larger table, but one more
+         * element may be ok. Next time we insert, we'll try expanding again.*/
+        hashtable_expand(h);
+    }
+    e = (struct entry *)malloc(sizeof(struct entry));
+    if (NULL == e) { --(h->entrycount); return 0; } /*oom*/
+    e->h = hash(h,k);
+    index = indexFor(h->tablelength,e->h);
+    e->k = k;
+    e->v = v;
+    e->next = h->table[index];
+    h->table[index] = e;
+    return -1;
+}
+
+/*****************************************************************************/
+void * /* returns value associated with key */
+hashtable_search(struct hashtable *h, void *k)
+{
+    struct entry *e;
+    unsigned int hashvalue, index;
+    hashvalue = hash(h,k);
+    index = indexFor(h->tablelength,hashvalue);
+    e = h->table[index];
+    while (NULL != e)
+    {
+        /* Check hash value to short circuit heavier comparison */
+        if ((hashvalue == e->h) && (h->eqfn(k, e->k))) return e->v;
+        e = e->next;
+    }
+    return NULL;
+}
+
+/*****************************************************************************/
+void * /* returns value associated with key */
+hashtable_remove(struct hashtable *h, void *k)
+{
+    /* TODO: consider compacting the table when the load factor drops enough,
+     *       or provide a 'compact' method. */
+
+    struct entry *e;
+    struct entry **pE;
+    void *v;
+    unsigned int hashvalue, index;
+
+    hashvalue = hash(h,k);
+    index = indexFor(h->tablelength,hash(h,k));
+    pE = &(h->table[index]);
+    e = *pE;
+    while (NULL != e)
+    {
+        /* Check hash value to short circuit heavier comparison */
+        if ((hashvalue == e->h) && (h->eqfn(k, e->k)))
+        {
+            *pE = e->next;
+            h->entrycount--;
+            v = e->v;
+            freekey(e->k);
+            free(e);
+            return v;
+        }
+        pE = &(e->next);
+        e = e->next;
+    }
+    return NULL;
+}
+
+/*****************************************************************************/
+/* destroy */
+void
+hashtable_destroy(struct hashtable *h, int free_values)
+{
+    unsigned int i;
+    struct entry *e, *f;
+    struct entry **table = h->table;
+    if (free_values)
+    {
+        for (i = 0; i < h->tablelength; i++)
+        {
+            e = table[i];
+            while (NULL != e)
+            { f = e; e = e->next; freekey(f->k); free(f->v); free(f); }
+        }
+    }
+    else
+    {
+        for (i = 0; i < h->tablelength; i++)
+        {
+            e = table[i];
+            while (NULL != e)
+            { f = e; e = e->next; freekey(f->k); free(f); }
+        }
+    }
+    free(h->table);
+    free(h);
+}
+
+/*
+ * Copyright (c) 2002, Christopher Clark
+ * All rights reserved.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 
+ * * Neither the name of the original author; nor the names of any contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ * 
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
diff --git a/hashtable.h b/hashtable.h

new file mode 100644 (file)

index 0000000..3120af6
--- /dev/null
+++ b/hashtable.h
@@ -0,0 +1,228 @@
+/*
+  Copyright (c) 2002, 2004, Christopher Clark
+  All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright notice,
+      this list of conditions and the following disclaimer in the documentation
+      and/or other materials provided with the distribution.
+
+    * Neither the name of the original author; nor the names of any
+      contributors may be used to endorse or promote products derived from this
+      software without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+  POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef __HASHTABLE_CWC22_H__
+#define __HASHTABLE_CWC22_H__
+
+struct hashtable;
+
+/* Example of use:
+ *
+ *      struct hashtable  *h;
+ *      struct some_key   *k;
+ *      struct some_value *v;
+ *
+ *      static unsigned int         hash_from_key_fn( void *k );
+ *      static int                  keys_equal_fn ( void *key1, void *key2 );
+ *
+ *      h = create_hashtable(16, hash_from_key_fn, keys_equal_fn);
+ *      k = (struct some_key *)     malloc(sizeof(struct some_key));
+ *      v = (struct some_value *)   malloc(sizeof(struct some_value));
+ *
+ *      (initialise k and v to suitable values)
+ * 
+ *      if (! hashtable_insert(h,k,v) )
+ *      {     exit(-1);               }
+ *
+ *      if (NULL == (found = hashtable_search(h,k) ))
+ *      {    printf("not found!");                  }
+ *
+ *      if (NULL == (found = hashtable_remove(h,k) ))
+ *      {    printf("Not found\n");                 }
+ *
+ */
+
+/* Macros may be used to define type-safe(r) hashtable access functions, with
+ * methods specialized to take known key and value types as parameters.
+ * 
+ * Example:
+ *
+ * Insert this at the start of your file:
+ *
+ * DEFINE_HASHTABLE_INSERT(insert_some, struct some_key, struct some_value);
+ * DEFINE_HASHTABLE_SEARCH(search_some, struct some_key, struct some_value);
+ * DEFINE_HASHTABLE_REMOVE(remove_some, struct some_key, struct some_value);
+ *
+ * This defines the functions 'insert_some', 'search_some' and 'remove_some'.
+ * These operate just like hashtable_insert etc., with the same parameters,
+ * but their function signatures have 'struct some_key *' rather than
+ * 'void *', and hence can generate compile time errors if your program is
+ * supplying incorrect data as a key (and similarly for value).
+ *
+ * Note that the hash and key equality functions passed to create_hashtable
+ * still take 'void *' parameters instead of 'some key *'. This shouldn't be
+ * a difficult issue as they're only defined and passed once, and the other
+ * functions will ensure that only valid keys are supplied to them.
+ *
+ * The cost for this checking is increased code size and runtime overhead
+ * - if performance is important, it may be worth switching back to the
+ * unsafe methods once your program has been debugged with the safe methods.
+ * This just requires switching to some simple alternative defines - eg:
+ * #define insert_some hashtable_insert
+ *
+ */
+
+/*****************************************************************************
+ * create_hashtable
+   
+ * @name                    create_hashtable
+ * @param   minsize         minimum initial size of hashtable
+ * @param   hashfunction    function for hashing keys
+ * @param   key_eq_fn       function for determining key equality
+ * @return                  newly created hashtable or NULL on failure
+ */
+
+struct hashtable *
+create_hashtable(unsigned int minsize,
+                 unsigned int (*hashfunction) (void*),
+                 int (*key_eq_fn) (void*,void*));
+
+/*****************************************************************************
+ * hashtable_insert
+   
+ * @name        hashtable_insert
+ * @param   h   the hashtable to insert into
+ * @param   k   the key - hashtable claims ownership and will free on removal
+ * @param   v   the value - does not claim ownership
+ * @return      non-zero for successful insertion
+ *
+ * This function will cause the table to expand if the insertion would take
+ * the ratio of entries to table size over the maximum load factor.
+ *
+ * This function does not check for repeated insertions with a duplicate key.
+ * The value returned when using a duplicate key is undefined -- when
+ * the hashtable changes size, the order of retrieval of duplicate key
+ * entries is reversed.
+ * If in doubt, remove before insert.
+ */
+
+int 
+hashtable_insert(struct hashtable *h, void *k, void *v);
+
+#define DEFINE_HASHTABLE_INSERT(fnname, keytype, valuetype) \
+int fnname (struct hashtable *h, keytype *k, valuetype *v) \
+{ \
+    return hashtable_insert(h,k,v); \
+}
+
+/*****************************************************************************
+ * hashtable_search
+   
+ * @name        hashtable_search
+ * @param   h   the hashtable to search
+ * @param   k   the key to search for  - does not claim ownership
+ * @return      the value associated with the key, or NULL if none found
+ */
+
+void *
+hashtable_search(struct hashtable *h, void *k);
+
+#define DEFINE_HASHTABLE_SEARCH(fnname, keytype, valuetype) \
+valuetype * fnname (struct hashtable *h, keytype *k) \
+{ \
+    return (valuetype *) (hashtable_search(h,k)); \
+}
+
+/*****************************************************************************
+ * hashtable_remove
+   
+ * @name        hashtable_remove
+ * @param   h   the hashtable to remove the item from
+ * @param   k   the key to search for  - does not claim ownership
+ * @return      the value associated with the key, or NULL if none found
+ */
+
+void * /* returns value */
+hashtable_remove(struct hashtable *h, void *k);
+
+#define DEFINE_HASHTABLE_REMOVE(fnname, keytype, valuetype) \
+valuetype * fnname (struct hashtable *h, keytype *k) \
+{ \
+    return (valuetype *) (hashtable_remove(h,k)); \
+}
+
+
+/*****************************************************************************
+ * hashtable_count
+   
+ * @name        hashtable_count
+ * @param   h   the hashtable
+ * @return      the number of items stored in the hashtable
+ */
+unsigned int
+hashtable_count(struct hashtable *h);
+
+
+/*****************************************************************************
+ * hashtable_destroy
+   
+ * @name        hashtable_destroy
+ * @param   h   the hashtable
+ * @param       free_values     whether to call 'free' on the remaining values
+ */
+
+void
+hashtable_destroy(struct hashtable *h, int free_values);
+
+#endif /* __HASHTABLE_CWC22_H__ */
+
+/*
+ * Copyright (c) 2002, Christopher Clark
+ * All rights reserved.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 
+ * * Neither the name of the original author; nor the names of any contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ * 
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
diff --git a/hashtable_itr.c b/hashtable_itr.c

new file mode 100644 (file)

index 0000000..5dced84
--- /dev/null
+++ b/hashtable_itr.c
@@ -0,0 +1,188 @@
+/* Copyright (C) 2002, 2004 Christopher Clark  <firstname.lastname@cl.cam.ac.uk> */
+
+#include "hashtable.h"
+#include "hashtable_private.h"
+#include "hashtable_itr.h"
+#include <stdlib.h> /* defines NULL */
+
+/*****************************************************************************/
+/* hashtable_iterator    - iterator constructor */
+
+struct hashtable_itr *
+hashtable_iterator(struct hashtable *h)
+{
+    unsigned int i, tablelength;
+    struct hashtable_itr *itr = (struct hashtable_itr *)
+        malloc(sizeof(struct hashtable_itr));
+    if (NULL == itr) return NULL;
+    itr->h = h;
+    itr->e = NULL;
+    itr->parent = NULL;
+    tablelength = h->tablelength;
+    itr->index = tablelength;
+    if (0 == h->entrycount) return itr;
+
+    for (i = 0; i < tablelength; i++)
+    {
+        if (NULL != h->table[i])
+        {
+            itr->e = h->table[i];
+            itr->index = i;
+            break;
+        }
+    }
+    return itr;
+}
+
+/*****************************************************************************/
+/* key      - return the key of the (key,value) pair at the current position */
+/* value    - return the value of the (key,value) pair at the current position */
+
+void *
+hashtable_iterator_key(struct hashtable_itr *i)
+{ return i->e->k; }
+
+void *
+hashtable_iterator_value(struct hashtable_itr *i)
+{ return i->e->v; }
+
+/*****************************************************************************/
+/* advance - advance the iterator to the next element
+ *           returns zero if advanced to end of table */
+
+int
+hashtable_iterator_advance(struct hashtable_itr *itr)
+{
+    unsigned int j,tablelength;
+    struct entry **table;
+    struct entry *next;
+    if (NULL == itr->e) return 0; /* stupidity check */
+
+    next = itr->e->next;
+    if (NULL != next)
+    {
+        itr->parent = itr->e;
+        itr->e = next;
+        return -1;
+    }
+    tablelength = itr->h->tablelength;
+    itr->parent = NULL;
+    if (tablelength <= (j = ++(itr->index)))
+    {
+        itr->e = NULL;
+        return 0;
+    }
+    table = itr->h->table;
+    while (NULL == (next = table[j]))
+    {
+        if (++j >= tablelength)
+        {
+            itr->index = tablelength;
+            itr->e = NULL;
+            return 0;
+        }
+    }
+    itr->index = j;
+    itr->e = next;
+    return -1;
+}
+
+/*****************************************************************************/
+/* remove - remove the entry at the current iterator position
+ *          and advance the iterator, if there is a successive
+ *          element.
+ *          If you want the value, read it before you remove:
+ *          beware memory leaks if you don't.
+ *          Returns zero if end of iteration. */
+
+int
+hashtable_iterator_remove(struct hashtable_itr *itr)
+{
+    struct entry *remember_e, *remember_parent;
+    int ret;
+
+    /* Do the removal */
+    if (NULL == (itr->parent))
+    {
+        /* element is head of a chain */
+        itr->h->table[itr->index] = itr->e->next;
+    } else {
+        /* element is mid-chain */
+        itr->parent->next = itr->e->next;
+    }
+    /* itr->e is now outside the hashtable */
+    remember_e = itr->e;
+    itr->h->entrycount--;
+    freekey(remember_e->k);
+
+    /* Advance the iterator, correcting the parent */
+    remember_parent = itr->parent;
+    ret = hashtable_iterator_advance(itr);
+    if (itr->parent == remember_e) { itr->parent = remember_parent; }
+    free(remember_e);
+    return ret;
+}
+
+/*****************************************************************************/
+int /* returns zero if not found */
+hashtable_iterator_search(struct hashtable_itr *itr,
+                          struct hashtable *h, void *k)
+{
+    struct entry *e, *parent;
+    unsigned int hashvalue, index;
+
+    hashvalue = hash(h,k);
+    index = indexFor(h->tablelength,hashvalue);
+
+    e = h->table[index];
+    parent = NULL;
+    while (NULL != e)
+    {
+        /* Check hash value to short circuit heavier comparison */
+        if ((hashvalue == e->h) && (h->eqfn(k, e->k)))
+        {
+            itr->index = index;
+            itr->e = e;
+            itr->parent = parent;
+            itr->h = h;
+            return -1;
+        }
+        parent = e;
+        e = e->next;
+    }
+    return 0;
+}
+
+
+/*
+ * Copyright (c) 2002, 2004, Christopher Clark
+ * All rights reserved.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 
+ * * Neither the name of the original author; nor the names of any contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ * 
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
diff --git a/hashtable_itr.h b/hashtable_itr.h

new file mode 100644 (file)

index 0000000..eea699a
--- /dev/null
+++ b/hashtable_itr.h
@@ -0,0 +1,112 @@
+/* Copyright (C) 2002, 2004 Christopher Clark <firstname.lastname@cl.cam.ac.uk> */
+
+#ifndef __HASHTABLE_ITR_CWC22__
+#define __HASHTABLE_ITR_CWC22__
+#include "hashtable.h"
+#include "hashtable_private.h" /* needed to enable inlining */
+
+/*****************************************************************************/
+/* This struct is only concrete here to allow the inlining of two of the
+ * accessor functions. */
+struct hashtable_itr
+{
+    struct hashtable *h;
+    struct entry *e;
+    struct entry *parent;
+    unsigned int index;
+};
+
+
+/*****************************************************************************/
+/* hashtable_iterator
+ */
+
+struct hashtable_itr *
+hashtable_iterator(struct hashtable *h);
+
+/*****************************************************************************/
+/* hashtable_iterator_key
+ * - return the value of the (key,value) pair at the current position */
+
+extern inline void *
+hashtable_iterator_key(struct hashtable_itr *i)
+{
+    return i->e->k;
+}
+
+/*****************************************************************************/
+/* value - return the value of the (key,value) pair at the current position */
+
+extern inline void *
+hashtable_iterator_value(struct hashtable_itr *i)
+{
+    return i->e->v;
+}
+
+/*****************************************************************************/
+/* advance - advance the iterator to the next element
+ *           returns zero if advanced to end of table */
+
+int
+hashtable_iterator_advance(struct hashtable_itr *itr);
+
+/*****************************************************************************/
+/* remove - remove current element and advance the iterator to the next element
+ *          NB: if you need the value to free it, read it before
+ *          removing. ie: beware memory leaks!
+ *          returns zero if advanced to end of table */
+
+int
+hashtable_iterator_remove(struct hashtable_itr *itr);
+
+/*****************************************************************************/
+/* search - overwrite the supplied iterator, to point to the entry
+ *          matching the supplied key.
+            h points to the hashtable to be searched.
+ *          returns zero if not found. */
+int
+hashtable_iterator_search(struct hashtable_itr *itr,
+                          struct hashtable *h, void *k);
+
+#define DEFINE_HASHTABLE_ITERATOR_SEARCH(fnname, keytype) \
+int fnname (struct hashtable_itr *i, struct hashtable *h, keytype *k) \
+{ \
+    return (hashtable_iterator_search(i,h,k)); \
+}
+
+
+
+#endif /* __HASHTABLE_ITR_CWC22__*/
+
+/*
+ * Copyright (c) 2002, 2004, Christopher Clark
+ * All rights reserved.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 
+ * * Neither the name of the original author; nor the names of any contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ * 
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
diff --git a/hashtable_private.h b/hashtable_private.h

new file mode 100644 (file)

index 0000000..3e95f60
--- /dev/null
+++ b/hashtable_private.h
@@ -0,0 +1,85 @@
+/* Copyright (C) 2002, 2004 Christopher Clark <firstname.lastname@cl.cam.ac.uk> */
+
+#ifndef __HASHTABLE_PRIVATE_CWC22_H__
+#define __HASHTABLE_PRIVATE_CWC22_H__
+
+#include "hashtable.h"
+
+/*****************************************************************************/
+struct entry
+{
+    void *k, *v;
+    unsigned int h;
+    struct entry *next;
+};
+
+struct hashtable {
+    unsigned int tablelength;
+    struct entry **table;
+    unsigned int entrycount;
+    unsigned int loadlimit;
+    unsigned int primeindex;
+    unsigned int (*hashfn) (void *k);
+    int (*eqfn) (void *k1, void *k2);
+};
+
+/*****************************************************************************/
+unsigned int
+hash(struct hashtable *h, void *k);
+
+/*****************************************************************************/
+/* indexFor */
+static inline unsigned int
+indexFor(unsigned int tablelength, unsigned int hashvalue) {
+    return (hashvalue % tablelength);
+};
+
+/* Only works if tablelength == 2^N */
+/*static inline unsigned int
+indexFor(unsigned int tablelength, unsigned int hashvalue)
+{
+    return (hashvalue & (tablelength - 1u));
+}
+*/
+
+/*****************************************************************************/
+#define freekey(X) free(X)
+/*define freekey(X) ; */
+
+
+/*****************************************************************************/
+
+#endif /* __HASHTABLE_PRIVATE_CWC22_H__*/
+
+/*
+ * Copyright (c) 2002, Christopher Clark
+ * All rights reserved.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 
+ * * Neither the name of the original author; nor the names of any contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ * 
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
diff --git a/hashutil.c b/hashutil.c

new file mode 100644 (file)

index 0000000..5989f76
--- /dev/null
+++ b/hashutil.c
@@ -0,0 +1,14 @@
+#include <string.h>
+
+#include "hashutil.h"
+#include "murmurhashneutral2.h"
+
+unsigned int hash_from_string(void *str)
+{
+       return murmurhashneutral2(str, strlen((const char *)str), 0);
+}
+
+int strings_equal(void *str1, void *str2)
+{
+       return strcmp((const char *)str1, (const char *)str2) == 0;
+}
diff --git a/hashutil.h b/hashutil.h

new file mode 100644 (file)

index 0000000..2fdd63e
--- /dev/null
+++ b/hashutil.h
@@ -0,0 +1,7 @@
+#ifndef HASHUTIL_H
+#define HASHUTIL_H
+
+unsigned int hash_from_string(void *str);
+int strings_equal(void *str1, void *str2);
+
+#endif
diff --git a/manifest.c b/manifest.c

new file mode 100644 (file)

index 0000000..db8dca3
--- /dev/null
+++ b/manifest.c
@@ -0,0 +1,639 @@
+/*
+ * Copyright (C) Joel Rosdahl 2009
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 675 Mass
+ * Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <assert.h>
+#include <inttypes.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "ccache.h"
+#include "hashtable_itr.h"
+#include "hashutil.h"
+#include "manifest.h"
+#include "murmurhashneutral2.h"
+
+extern char *temp_dir;
+
+/*
+ * Sketchy specification of the manifest disk format:
+ *
+ * <magic>         magic number                        (4 bytes)
+ * <version>       version                             (2 bytes unsigned int)
+ * ----------------------------------------------------------------------------
+ * <n>             number of include file paths        (2 bytes unsigned int)
+ * <path_0>        path to include file                (NUL-terminated string,
+ * ...                                                  at most 1024 bytes)
+ * <path_n-1>
+ * ----------------------------------------------------------------------------
+ * <n>             number of include file hash entries (2 bytes unsigned int)
+ * <index[0]>      index of include file path          (2 bytes unsigned int)
+ * <hash[0]>       hash of include file                (16 bytes)
+ * <size[0]>       size of include file                (4 bytes unsigned int)
+ * ...
+ * <hash[n-1]>
+ * <size[n-1]>
+ * <index[n-1]>
+ * ----------------------------------------------------------------------------
+ * <n>             number of object name entries       (2 bytes unsigned int)
+ * <m[0]>          number of include file hash indexes (2 bytes unsigned int)
+ * <index[0][0]>   include file hash index             (2 bytes unsigned int)
+ * ...
+ * <index[0][m[0]-1]>
+ * <hash[0]>       hash part of object name            (16 bytes)
+ * <size[0]>       size part of object name            (4 bytes unsigned int)
+ * ...
+ * <m[n-1]>        number of include file hash indexes
+ * <index[n-1][0]> include file hash index
+ * ...
+ * <index[n-1][m[n-1]]>
+ * <hash[n-1]>
+ * <size[n-1]>
+ */
+
+#define MAGIC 0x63436d46U
+#define VERSION 0
+
+#define static_assert(e) do { enum { static_assert__ = 1/(e) }; } while (0)
+
+struct file_info
+{
+       /* Index to n_files. */
+       uint32_t index;
+       /* Hash of referenced file. */
+       uint8_t hash[16];
+       /* Size of referenced file. */
+       uint32_t size;
+};
+
+struct object
+{
+       /* Number of entries in file_info_indexes. */
+       uint32_t n_file_info_indexes;
+       /* Indexes to file_infos. */
+       uint32_t *file_info_indexes;
+       /* Hash of the object itself. */
+       struct file_hash hash;
+};
+
+struct manifest
+{
+       /* Referenced include files. */
+       uint32_t n_files;
+       char **files;
+
+       /* Information about referenced include files. */
+       uint32_t n_file_infos;
+       struct file_info *file_infos;
+
+       /* Object names plus references to include file hashes. */
+       uint32_t n_objects;
+       struct object *objects;
+};
+
+static unsigned int hash_from_file_info(void *key)
+{
+       static_assert(sizeof(struct file_info) == 24); /* No padding. */
+       return murmurhashneutral2(key, sizeof(struct file_info), 0);
+}
+
+static int file_infos_equal(void *key1, void *key2)
+{
+       struct file_info *fi1 = (struct file_info *)key1;
+       struct file_info *fi2 = (struct file_info *)key2;
+       return fi1->index == fi2->index
+               && memcmp(fi1->hash, fi2->hash, 16) == 0
+               && fi1->size == fi2->size;
+}
+
+static void free_manifest(struct manifest *mf)
+{
+       uint16_t i;
+       for (i = 0; i < mf->n_files; i++) {
+               free(mf->files[i]);
+       }
+       free(mf->files);
+       free(mf->file_infos);
+       for (i = 0; i < mf->n_objects; i++) {
+               free(mf->objects[i].file_info_indexes);
+       }
+       free(mf->objects);
+}
+
+#define READ_INT(size, var)                            \
+       do {                                            \
+               int ch_;                                \
+               size_t i_;                              \
+               (var) = 0;                              \
+               for (i_ = 0; i_ < (size); i_++) {       \
+                       ch_ = getc(f);                  \
+                       if (ch_ == EOF) {               \
+                               goto error;             \
+                       }                               \
+                       (var) <<= 8;                    \
+                       (var) |= ch_ & 0xFF;            \
+               }                                       \
+       } while (0)
+
+#define READ_STR(var)                                  \
+       do {                                            \
+               char buf_[1024];                        \
+               size_t i_;                              \
+               int ch_;                                \
+               for (i_ = 0; i_ < sizeof(buf_); i_++) { \
+                       ch_ = getc(f);                  \
+                       if (ch_ == EOF) {               \
+                               goto error;             \
+                       }                               \
+                       buf_[i_] = ch_;                 \
+                       if (ch_ == '\0') {              \
+                               break;                  \
+                       }                               \
+               }                                       \
+               if (i_ == sizeof(buf_)) {               \
+                       goto error;                     \
+               }                                       \
+               (var) = x_strdup(buf_);                 \
+       } while (0)
+
+#define READ_BYTES(n, var)                     \
+       do {                                    \
+               size_t i_;                      \
+               int ch_;                        \
+               for (i_ = 0; i_ < (n); i_++) {  \
+                       ch_ = getc(f);          \
+                       if (ch_ == EOF) {       \
+                               goto error;     \
+                       }                       \
+                       (var)[i_] = ch_;        \
+               }                               \
+       } while (0)
+
+static struct manifest *read_manifest(FILE *f)
+{
+       struct stat st;
+       struct manifest *mf;
+       uint16_t i, j;
+       size_t n;
+       uint32_t magic, version;
+
+       if (fstat(fileno(f), &st) != 0) {
+               return NULL;
+       }
+
+       mf = x_malloc(sizeof(*mf));
+       mf->n_files = 0;
+       mf->files = NULL;
+       mf->n_file_infos = 0;
+       mf->file_infos = NULL;
+       mf->n_objects = 0;
+       mf->objects = NULL;
+
+       if (st.st_size == 0) {
+               /* New file. */
+               return mf;
+       }
+
+       READ_INT(4, magic);
+       if (magic != MAGIC) {
+               cc_log("Manifest file has bad magic number %u\n", magic);
+               free_manifest(mf);
+               return NULL;
+       }
+       READ_INT(2, version);
+       if (version != VERSION) {
+               cc_log("Manifest file has unknown version %u\n", version);
+               free_manifest(mf);
+               return NULL;
+       }
+
+       READ_INT(2, mf->n_files);
+       n = mf->n_files * sizeof(*mf->files);
+       mf->files = x_malloc(n);
+       memset(mf->files, 0, n);
+       for (i = 0; i < mf->n_files; i++) {
+               READ_STR(mf->files[i]);
+       }
+
+       READ_INT(2, mf->n_file_infos);
+       n = mf->n_file_infos * sizeof(*mf->file_infos);
+       mf->file_infos = x_malloc(n);
+       memset(mf->file_infos, 0, n);
+       for (i = 0; i < mf->n_file_infos; i++) {
+               READ_INT(2, mf->file_infos[i].index);
+               READ_BYTES(16, mf->file_infos[i].hash);
+               READ_INT(4, mf->file_infos[i].size);
+       }
+
+       READ_INT(2, mf->n_objects);
+       n = mf->n_objects * sizeof(*mf->objects);
+       mf->objects = x_malloc(n);
+       memset(mf->objects, 0, n);
+       for (i = 0; i < mf->n_objects; i++) {
+               READ_INT(2, mf->objects[i].n_file_info_indexes);
+               n = mf->objects[i].n_file_info_indexes
+                   * sizeof(*mf->objects[i].file_info_indexes);
+               mf->objects[i].file_info_indexes = x_malloc(n);
+               memset(mf->objects[i].file_info_indexes, 0, n);
+               for (j = 0; j < mf->objects[i].n_file_info_indexes; j++) {
+                       READ_INT(2, mf->objects[i].file_info_indexes[j]);
+               }
+               READ_BYTES(16, mf->objects[i].hash.hash);
+               READ_INT(4, mf->objects[i].hash.size);
+       }
+
+       return mf;
+
+error:
+       cc_log("Corrupt manifest file\n");
+       free_manifest(mf);
+       return NULL;
+}
+
+#define WRITE_INT(size, var)                                           \
+       do {                                                            \
+               char ch_;                                               \
+               size_t i_;                                              \
+               for (i_ = 0; i_ < (size); i_++) {                       \
+                       ch_ = ((var) >> (8 * ((size) - i_ - 1)));       \
+                       if (putc(ch_, f) == EOF) {                      \
+                               goto error;                             \
+                       }                                               \
+               }                                                       \
+       } while (0)
+
+#define WRITE_STR(var)                                                 \
+       do {                                                            \
+               if (fputs(var, f) == EOF || putc('\0', f) == EOF) {     \
+                       goto error;                                     \
+               }                                                       \
+       } while (0)
+
+#define WRITE_BYTES(n, var)                                    \
+       do {                                                    \
+               size_t i_;                                      \
+               for (i_ = 0; i_ < (n); i_++) {                  \
+                       if (putc((var)[i_], f) == EOF) {        \
+                               goto error;                     \
+                       }                                       \
+               }                                               \
+       } while (0)
+
+static int write_manifest(FILE *f, const struct manifest *mf)
+{
+       uint16_t i, j;
+
+       WRITE_INT(4, MAGIC);
+       WRITE_INT(2, VERSION);
+
+       WRITE_INT(2, mf->n_files);
+       for (i = 0; i < mf->n_files; i++) {
+               WRITE_STR(mf->files[i]);
+       }
+
+       WRITE_INT(2, mf->n_file_infos);
+       for (i = 0; i < mf->n_file_infos; i++) {
+               WRITE_INT(2, mf->file_infos[i].index);
+               WRITE_BYTES(16, mf->file_infos[i].hash);
+               WRITE_INT(4, mf->file_infos[i].size);
+       }
+
+       WRITE_INT(2, mf->n_objects);
+       for (i = 0; i < mf->n_objects; i++) {
+               WRITE_INT(2, mf->objects[i].n_file_info_indexes);
+               for (j = 0; j < mf->objects[i].n_file_info_indexes; j++) {
+                       WRITE_INT(2, mf->objects[i].file_info_indexes[j]);
+               }
+               WRITE_BYTES(16, mf->objects[i].hash.hash);
+               WRITE_INT(4, mf->objects[i].hash.size);
+       }
+
+       return 1;
+
+error:
+       cc_log("Error writing to manifest file");
+       return 0;
+}
+
+static int verify_object(struct manifest *mf, struct object *obj,
+                         struct hashtable *hashed_files)
+{
+       uint32_t i;
+       struct file_info *fi;
+       struct file_hash *actual;
+       struct mdfour hash;
+
+       for (i = 0; i < obj->n_file_info_indexes; i++) {
+               fi = &mf->file_infos[obj->file_info_indexes[i]];
+               actual = hashtable_search(hashed_files, mf->files[fi->index]);
+               if (!actual) {
+                       actual = x_malloc(sizeof(*actual));
+                       hash_start(&hash);
+                       if (!hash_file(&hash, mf->files[fi->index])) {
+                               cc_log("Failed hashing %s\n",
+                                      mf->files[fi->index]);
+                               free(actual);
+                               return 0;
+                       }
+                       hash_result_as_bytes(&hash, actual->hash);
+                       actual->size = hash.totalN;
+                       hashtable_insert(hashed_files,
+                                        x_strdup(mf->files[fi->index]),
+                                        actual);
+               }
+               if (memcmp(fi->hash, actual->hash, 16) != 0
+                   || fi->size != actual->size) {
+                       return 0;
+               }
+       }
+
+       return 1;
+}
+
+static struct hashtable *create_string_index_map(char **strings, uint32_t len)
+{
+       uint32_t i;
+       struct hashtable *h;
+       uint32_t *index;
+
+       h = create_hashtable(1000, hash_from_string, strings_equal);
+       for (i = 0; i < len; i++) {
+               index = x_malloc(sizeof(*index));
+               *index = i;
+               hashtable_insert(h, x_strdup(strings[i]), index);
+       }
+       return h;
+}
+
+static struct hashtable *create_file_info_index_map(struct file_info *infos,
+                                                    uint32_t len)
+{
+       uint32_t i;
+       struct hashtable *h;
+       struct file_info *fi;
+       uint32_t *index;
+
+       h = create_hashtable(1000, hash_from_file_info, file_infos_equal);
+       for (i = 0; i < len; i++) {
+               fi = x_malloc(sizeof(*fi));
+               *fi = infos[i];
+               index = x_malloc(sizeof(*index));
+               *index = i;
+               hashtable_insert(h, fi, index);
+       }
+       return h;
+}
+
+static uint32_t get_include_file_index(struct manifest *mf,
+                                       char *path,
+                                       struct hashtable *mf_files)
+{
+       uint32_t *index;
+       uint32_t n;
+
+       index = hashtable_search(mf_files, path);
+       if (index) {
+               return *index;
+       }
+
+       n = mf->n_files;
+       mf->files = x_realloc(mf->files, (n + 1) * sizeof(*mf->files));
+       mf->n_files++;
+       mf->files[n] = x_strdup(path);
+
+       return n;
+}
+
+static uint32 get_file_hash_index(struct manifest *mf,
+                                  char *path,
+                                  struct file_hash *file_hash,
+                                  struct hashtable *mf_files,
+                                  struct hashtable *mf_file_infos)
+{
+       struct file_info fi;
+       uint32_t *fi_index;
+       uint32_t n;
+
+       fi.index = get_include_file_index(mf, path, mf_files);
+       memcpy(fi.hash, file_hash->hash, sizeof(fi.hash));
+       fi.size = file_hash->size;
+
+       fi_index = hashtable_search(mf_file_infos, &fi);
+       if (fi_index) {
+               return *fi_index;
+       }
+
+       n = mf->n_file_infos;
+       mf->file_infos = x_realloc(mf->file_infos,
+                                  (n + 1) * sizeof(*mf->file_infos));
+       mf->n_file_infos++;
+       mf->file_infos[n] = fi;
+
+       return n;
+}
+
+static void
+add_file_info_indexes(uint32_t *indexes, uint32_t size,
+                      struct manifest *mf, struct hashtable *included_files)
+{
+       struct hashtable_itr *iter;
+       uint32_t i;
+       char *path;
+       struct file_hash *file_hash;
+       struct hashtable *mf_files; /* path --> index */
+       struct hashtable *mf_file_infos; /* struct file_info --> index */
+
+       if (size == 0) {
+               return;
+       }
+
+       mf_files = create_string_index_map(mf->files, mf->n_files);
+       mf_file_infos = create_file_info_index_map(mf->file_infos,
+                                                  mf->n_file_infos);
+       iter = hashtable_iterator(included_files);
+       i = 0;
+       do {
+               path = hashtable_iterator_key(iter);
+               file_hash = hashtable_iterator_value(iter);
+               indexes[i] = get_file_hash_index(mf, path, file_hash, mf_files,
+                                                mf_file_infos);
+               i++;
+       } while (hashtable_iterator_advance(iter));
+       assert(i == size);
+
+       hashtable_destroy(mf_file_infos, 1);
+       hashtable_destroy(mf_files, 1);
+}
+
+static void add_object_entry(struct manifest *mf,
+                             struct file_hash *object_hash,
+                             struct hashtable *included_files)
+{
+       struct object *obj;
+       uint32_t n;
+
+       n = mf->n_objects;
+       mf->objects = x_realloc(mf->objects, (n + 1) * sizeof(*mf->objects));
+       mf->n_objects++;
+       obj = &mf->objects[n];
+
+       n = hashtable_count(included_files);
+       obj->n_file_info_indexes = n;
+       obj->file_info_indexes = x_malloc(n * sizeof(*obj->file_info_indexes));
+       add_file_info_indexes(obj->file_info_indexes, n, mf, included_files);
+       memcpy(obj->hash.hash, object_hash->hash, 16);
+       obj->hash.size = object_hash->size;
+}
+
+/*
+ * Try to get the object hash from a manifest file. Caller frees. Returns NULL
+ * on failure.
+ */
+struct file_hash *manifest_get(const char *manifest_path)
+{
+       int fd;
+       FILE *f = NULL;
+       struct manifest *mf = NULL;
+       struct hashtable *hashed_files = NULL; /* path --> struct file_hash */
+       uint32_t i;
+       struct file_hash *fh = NULL;
+
+       fd = open(manifest_path, O_RDONLY);
+       if (fd == -1) {
+               /* Cache miss. */
+               goto out;
+       }
+       if (read_lock_fd(fd) == -1) {
+               cc_log("Failed to read lock %s\n", manifest_path);
+               goto out;
+       }
+       f = fdopen(fd, "rb");
+       if (!f) {
+               cc_log("Failed to fdopen lock %s\n", manifest_path);
+               goto out;
+       }
+       mf = read_manifest(f);
+       if (!mf) {
+               cc_log("Error reading %s\n", manifest_path);
+               goto out;
+       }
+
+       hashed_files = create_hashtable(1000, hash_from_string, strings_equal);
+
+       /* Check newest object first since it's a bit more likely to match. */
+       for (i = mf->n_objects; i > 0; i--) {
+               if (verify_object(mf, &mf->objects[i - 1], hashed_files)) {
+                       fh = x_malloc(sizeof(*fh));
+                       *fh = mf->objects[i - 1].hash;
+                       goto out;
+               }
+       }
+
+out:
+       if (hashed_files) {
+               hashtable_destroy(hashed_files, 1);
+       }
+       if (f) {
+               fclose(f);
+       }
+       if (mf) {
+               free_manifest(mf);
+       }
+       return fh;
+}
+
+/*
+ * Put the object name into a manifest file given a set of included files.
+ * Returns 1 on success, otherwise 0.
+ */
+int manifest_put(const char *manifest_path, struct file_hash *object_hash,
+                 struct hashtable *included_files)
+{
+       int ret = 0;
+       int fd1;
+       int fd2;
+       FILE *f1 = NULL;
+       FILE *f2 = NULL;
+       struct manifest *mf = NULL;
+       char *tmp_file = NULL;
+
+       fd1 = safe_open(manifest_path);
+       if (fd1 == -1) {
+               cc_log("Failed to open %s\n", manifest_path);
+               goto out;
+       }
+       if (write_lock_fd(fd1) == -1) {
+               cc_log("Failed to write lock %s\n", manifest_path);
+               close(fd1);
+               goto out;
+       }
+       f1 = fdopen(fd1, "rb");
+       if (!f1) {
+               cc_log("Failed to fdopen %s\n", manifest_path);
+               close(fd1);
+               goto out;
+       }
+       mf = read_manifest(f1);
+       if (!mf) {
+               cc_log("Failed to read %s\n", manifest_path);
+               goto out;
+       }
+
+       x_asprintf(&tmp_file, "%s/manifest.tmp.%s", temp_dir, tmp_string());
+
+       fd2 = safe_open(tmp_file);
+       if (fd2 == -1) {
+               cc_log("Failed to open %s\n", tmp_file);
+               goto out;
+       }
+       f2 = fdopen(fd2, "wb");
+       if (!f2) {
+               cc_log("Failed to fdopen %s\n", tmp_file);
+               goto out;
+       }
+
+       add_object_entry(mf, object_hash, included_files);
+       if (write_manifest(f2, mf)) {
+               if (rename(tmp_file, manifest_path) == 0) {
+                       ret = 1;
+               } else {
+                       cc_log("Failed to rename %s to %s\n",
+                              tmp_file, manifest_path);
+                       goto out;
+               }
+       } else {
+               cc_log("Failed to write manifest %s\n", manifest_path);
+               goto out;
+       }
+
+out:
+       if (mf) {
+               free_manifest(mf);
+       }
+       if (tmp_file) {
+               free(tmp_file);
+       }
+       if (f2) {
+               fclose(f2);
+       }
+       if (f1) {
+               fclose(f1);
+       }
+       return ret;
+}
diff --git a/manifest.h b/manifest.h

new file mode 100644 (file)

index 0000000..b80014e
--- /dev/null
+++ b/manifest.h
@@ -0,0 +1,17 @@
+#ifndef MANIFEST_H
+#define MANIFEST_H
+
+#include <inttypes.h>
+#include "hashtable.h"
+
+struct file_hash
+{
+       uint8_t hash[16];
+       uint32_t size;
+};
+
+struct file_hash *manifest_get(const char *manifest_path);
+int manifest_put(const char *manifest_path, struct file_hash *object_hash,
+                 struct hashtable *included_files);
+
+#endif
diff --git a/murmurhashneutral2.c b/murmurhashneutral2.c

new file mode 100644 (file)

index 0000000..ade30b8
--- /dev/null
+++ b/murmurhashneutral2.c
@@ -0,0 +1,49 @@
+/*
+ * MurmurHashNeutral2, by Austin Appleby. Released to the public domain. See
+ * <http://murmurhash.googlepages.com>.
+ */
+
+#include "murmurhashneutral2.h"
+
+unsigned int murmurhashneutral2(const void *key, int len, unsigned int seed)
+{
+       const unsigned int m = 0x5bd1e995;
+       const int r = 24;
+
+       unsigned int h = seed ^ len;
+
+       const unsigned char *data = (const unsigned char *)key;
+
+       while (len >= 4) {
+               unsigned int k;
+
+               k  = data[0];
+               k |= data[1] << 8;
+               k |= data[2] << 16;
+               k |= data[3] << 24;
+
+               k *= m;
+               k ^= k >> r;
+               k *= m;
+
+               h *= m;
+               h ^= k;
+
+               data += 4;
+               len -= 4;
+       }
+
+       switch (len)
+       {
+       case 3: h ^= data[2] << 16;
+       case 2: h ^= data[1] << 8;
+       case 1: h ^= data[0];
+               h *= m;
+       };
+
+       h ^= h >> 13;
+       h *= m;
+       h ^= h >> 15;
+
+       return h;
+}
diff --git a/murmurhashneutral2.h b/murmurhashneutral2.h

new file mode 100644 (file)

index 0000000..fe056fd
--- /dev/null
+++ b/murmurhashneutral2.h
@@ -0,0 +1,6 @@
+#ifndef MURMURHASHNEUTRAL2_H
+#define MURMURHASHNEUTRAL2_H
+
+unsigned int murmurhashneutral2(const void *key, int len, unsigned int seed);
+
+#endif
diff --git a/stats.c b/stats.c

index d8317960dd647f9200c2a6e97f10ce384461242d..f6e479764426c84cf218dc151df52b49b1c01181 100644 (file)
--- a/stats.c
+++ b/stats.c
@@ -32,13 +32,15 @@ extern char *cache_dir;
  #define FLAG_NOZERO 1 /* don't zero with the -z option */
  #define FLAG_ALWAYS 2 /* always show, even if zero */
  
+/* statistics fields in display order */
  static struct {
         enum stats stat;
         char *message;
         void (*fn)(unsigned );
         unsigned flags;
  } stats_info[] = {
-       { STATS_CACHED,       "cache hit                      ", NULL, FLAG_ALWAYS },
+       { STATS_CACHEHIT_DIR, "cache hit (direct)             ", NULL, FLAG_ALWAYS },
+       { STATS_CACHEHIT_CPP, "cache hit (preprocessed)       ", NULL, FLAG_ALWAYS },
         { STATS_TOCACHE,      "cache miss                     ", NULL, FLAG_ALWAYS },
         { STATS_LINK,         "called for link                ", NULL, 0 },
         { STATS_MULTIPLE,     "multiple source files          ", NULL, 0 },
@@ -137,7 +139,7 @@ static void stats_update_size(enum stats stat, size_t size)
  
         memset(counters, 0, sizeof(counters));
  
-       if (lock_fd(fd) != 0) return;
+       if (write_lock_fd(fd) != 0) return;
  
         /* read in the old stats */
         stats_read_fd(fd, counters);
@@ -197,7 +199,7 @@ void stats_read(const char *stats_file, unsigned counters[STATS_END])
                 stats_default(counters);
                 return;
         }
-       lock_fd(fd);
+       write_lock_fd(fd);
         stats_read_fd(fd, counters);
         close(fd);
  }
@@ -271,7 +273,7 @@ void stats_zero(void)
                         continue;
                 }
                 memset(counters, 0, sizeof(counters));
-               lock_fd(fd);
+               write_lock_fd(fd);
                 stats_read_fd(fd, counters);
                 for (i=0;stats_info[i].message;i++) {
                         if (!(stats_info[i].flags & FLAG_NOZERO)) {
@@ -317,7 +319,7 @@ int stats_set_limits(long maxfiles, long maxsize)
                 memset(counters, 0, sizeof(counters));
                 fd = safe_open(fname);
                 if (fd != -1) {
-                       lock_fd(fd);
+                       write_lock_fd(fd);
                         stats_read_fd(fd, counters);
                         if (maxfiles != -1) {
                                 counters[STATS_MAXFILES] = maxfiles;
@@ -348,7 +350,7 @@ void stats_set_sizes(const char *dir, size_t num_files, size_t total_size)
  
         fd = safe_open(stats_file);
         if (fd != -1) {
-               lock_fd(fd);
+               write_lock_fd(fd);
                 stats_read_fd(fd, counters);
                 counters[STATS_NUMFILES] = num_files;
                 counters[STATS_TOTALSIZE] = total_size;
diff --git a/test.sh b/test.sh

index 42f6c59cb4db1ce74b501e7141cec9fd96eee496..23d73007f8e0c139059454be7849aa2e88ae64e4 100755 (executable)
--- a/test.sh
+++ b/test.sh
@@ -10,11 +10,10 @@ else
  fi
  
  CCACHE=../ccache
-TESTDIR=test.$$
+TESTDIR=testdir.$$
  
  unset CCACHE_DISABLE
  
-
  test_failed() {
      reason="$1"
      echo $1
@@ -49,15 +48,23 @@ checkstat() {
      expected_value="$2"
      value=`getstat "$stat"`
      if [ "$expected_value" != "$value" ]; then
-        test_failed "SUITE: $testsuite TEST: $testname - Expected $stat to be $expected_value got $value"
+        test_failed "SUITE: $testsuite, TEST: $testname - Expected $stat to be $expected_value, got $value"
      fi
  }
  
+checkfile() {
+    if [ ! -f $1 ]; then
+        test_failed "SUITE: $testsuite, TEST: $testname - $1 not found"
+    fi
+    if [ `cat $1` != "$2" ]; then
+        test_failed "SUITE: $testsuite, TEST: $testname - Bad content of $2.\nExpected: $2\nActual: `cat $1`"
+    fi
+}
  
  basetests() {
      echo "starting testsuite $testsuite"
      rm -rf .ccache
-    checkstat 'cache hit' 0
+    checkstat 'cache hit (preprocessed)' 0
      checkstat 'cache miss' 0
  
      j=1
@@ -69,27 +76,27 @@ basetests() {
  
      testname="BASIC"
      $CCACHE_COMPILE -c test1.c
-    checkstat 'cache hit' 0
+    checkstat 'cache hit (preprocessed)' 0
      checkstat 'cache miss' 1
  
      testname="BASIC2"
      $CCACHE_COMPILE -c test1.c
-    checkstat 'cache hit' 1
+    checkstat 'cache hit (preprocessed)' 1
      checkstat 'cache miss' 1
  
      testname="debug"
      $CCACHE_COMPILE -c test1.c -g
-    checkstat 'cache hit' 1
+    checkstat 'cache hit (preprocessed)' 1
      checkstat 'cache miss' 2
  
      testname="debug2"
      $CCACHE_COMPILE -c test1.c -g
-    checkstat 'cache hit' 2
+    checkstat 'cache hit (preprocessed)' 2
      checkstat 'cache miss' 2
  
      testname="output"
      $CCACHE_COMPILE -c test1.c -o foo.o
-    checkstat 'cache hit' 3
+    checkstat 'cache hit (preprocessed)' 3
      checkstat 'cache miss' 2
  
      testname="link"
@@ -134,27 +141,27 @@ basetests() {
  
      testname="CCACHE_DISABLE"
      CCACHE_DISABLE=1 $CCACHE_COMPILE -c test1.c 2> /dev/null
-    checkstat 'cache hit' 3
+    checkstat 'cache hit (preprocessed)' 3
      $CCACHE_COMPILE -c test1.c
-    checkstat 'cache hit' 4
+    checkstat 'cache hit (preprocessed)' 4
  
      testname="CCACHE_CPP2"
      CCACHE_CPP2=1 $CCACHE_COMPILE -c test1.c -O -O
-    checkstat 'cache hit' 4
+    checkstat 'cache hit (preprocessed)' 4
      checkstat 'cache miss' 3
  
      CCACHE_CPP2=1 $CCACHE_COMPILE -c test1.c -O -O
-    checkstat 'cache hit' 5
+    checkstat 'cache hit (preprocessed)' 5
      checkstat 'cache miss' 3
  
      testname="CCACHE_NOSTATS"
      CCACHE_NOSTATS=1 $CCACHE_COMPILE -c test1.c -O -O
-    checkstat 'cache hit' 5
+    checkstat 'cache hit (preprocessed)' 5
      checkstat 'cache miss' 3
  
      testname="CCACHE_RECACHE"
      CCACHE_RECACHE=1 $CCACHE_COMPILE -c test1.c -O -O
-    checkstat 'cache hit' 5
+    checkstat 'cache hit (preprocessed)' 5
      checkstat 'cache miss' 4
  
      # strictly speaking should be 6 - RECACHE causes a double counting!
@@ -165,11 +172,11 @@ basetests() {
  
      testname="CCACHE_HASHDIR"
      CCACHE_HASHDIR=1 $CCACHE_COMPILE -c test1.c -O -O
-    checkstat 'cache hit' 5
+    checkstat 'cache hit (preprocessed)' 5
      checkstat 'cache miss' 5
  
      CCACHE_HASHDIR=1 $CCACHE_COMPILE -c test1.c -O -O
-    checkstat 'cache hit' 6
+    checkstat 'cache hit (preprocessed)' 6
      checkstat 'cache miss' 5
  
      checkstat 'files in cache' 8
@@ -179,26 +186,26 @@ basetests() {
      cat test1.c >> test1-comment.c
      $CCACHE_COMPILE -c test1-comment.c
      rm -f test1-comment*
-    checkstat 'cache hit' 6
+    checkstat 'cache hit (preprocessed)' 6
      checkstat 'cache miss' 6
  
      testname="CCACHE_UNIFY"
      CCACHE_UNIFY=1 $CCACHE_COMPILE -c test1.c
-    checkstat 'cache hit' 6
+    checkstat 'cache hit (preprocessed)' 6
      checkstat 'cache miss' 7
      mv test1.c test1-saved.c
      echo '/* another comment */' > test1.c
      cat test1-saved.c >> test1.c
      CCACHE_UNIFY=1 $CCACHE_COMPILE -c test1.c
      mv test1-saved.c test1.c
-    checkstat 'cache hit' 7
+    checkstat 'cache hit (preprocessed)' 7
      checkstat 'cache miss' 7
  
      testname="cache-size"
      for f in *.c; do
          $CCACHE_COMPILE -c $f
      done
-    checkstat 'cache hit' 8
+    checkstat 'cache hit (preprocessed)' 8
      checkstat 'cache miss' 37
      checkstat 'files in cache' 72
      $CCACHE -F 48 -c > /dev/null
@@ -208,20 +215,20 @@ basetests() {
  
      testname="cpp call"
      $CCACHE_COMPILE -c test1.c -E > test1.i
-    checkstat 'cache hit' 8
+    checkstat 'cache hit (preprocessed)' 8
      checkstat 'cache miss' 37
  
      testname="direct .i compile"
      $CCACHE_COMPILE -c test1.c
-    checkstat 'cache hit' 8
+    checkstat 'cache hit (preprocessed)' 8
      checkstat 'cache miss' 38
  
      $CCACHE_COMPILE -c test1.i
-    checkstat 'cache hit' 9
+    checkstat 'cache hit (preprocessed)' 9
      checkstat 'cache miss' 38
  
      $CCACHE_COMPILE -c test1.i
-    checkstat 'cache hit' 10
+    checkstat 'cache hit (preprocessed)' 10
      checkstat 'cache miss' 38
  
      # removed these tests as some compilers (including newer versions of gcc)
@@ -230,16 +237,16 @@ basetests() {
  #     testname="direct .ii file"
  #     mv test1.i test1.ii
  #     $CCACHE_COMPILE -c test1.ii
-#     checkstat 'cache hit' 10
+#     checkstat 'cache hit (preprocessed)' 10
  #     checkstat 'cache miss' 39
  
  #     $CCACHE_COMPILE -c test1.ii
-#     checkstat 'cache hit' 11
+#     checkstat 'cache hit (preprocessed)' 11
  #     checkstat 'cache miss' 39
  
      testname="zero-stats"
      $CCACHE -z > /dev/null
-    checkstat 'cache hit' 0
+    checkstat 'cache hit (preprocessed)' 0
      checkstat 'cache miss' 0
  
      testname="clear"
@@ -250,6 +257,102 @@ basetests() {
      rm -f test1.c
  }
  
+direct_tests() {
+    echo "starting testsuite $testsuite"
+    rm -rf .ccache
+    unset CCACHE_NODIRECT
+
+    ##################################################################
+    # Create some code to compile.
+    cat <<EOF >test.c
+/* test.c */
+#include "test1.h"
+#include "test2.h"
+EOF
+    cat <<EOF >test1.h
+#include "test3.h"
+int test1;
+EOF
+    cat <<EOF >test2.h
+int test2;
+EOF
+    cat <<EOF >test3.h
+int test3;
+EOF
+
+    sleep 1 # Sleep to make the include files trusted.
+
+    ##################################################################
+    # First compilation is a miss.
+    testname="first compilation"
+    $CCACHE -z >/dev/null
+    $CCACHE $COMPILER -c test.c
+    checkstat 'cache hit (direct)' 0
+    checkstat 'cache hit (preprocessed)' 0
+    checkstat 'cache miss' 1
+
+    ##################################################################
+    # Another compilation should now generate a direct hit.
+    testname="direct hit"
+    $CCACHE -z >/dev/null
+    $CCACHE $COMPILER -c test.c
+    checkstat 'cache hit (direct)' 1
+    checkstat 'cache hit (preprocessed)' 0
+    checkstat 'cache miss' 0
+
+    ##################################################################
+    # Compiling with CCACHE_NODIRECT set should generate a preprocessed hit.
+    testname="preprocessed hit"
+    $CCACHE -z >/dev/null
+    CCACHE_NODIRECT=1 $CCACHE $COMPILER -c test.c
+    checkstat 'cache hit (direct)' 0
+    checkstat 'cache hit (preprocessed)' 1
+    checkstat 'cache miss' 0
+
+    ##################################################################
+    # Test compilation of a modified include file.
+    testname="modified include file"
+    $CCACHE -z >/dev/null
+    echo "int test3_2;" >>test3.h
+    sleep 1 # Sleep to make the include file trusted.
+    $CCACHE $COMPILER -c test.c
+    checkstat 'cache hit (direct)' 0
+    checkstat 'cache hit (preprocessed)' 0
+    checkstat 'cache miss' 1
+
+    $CCACHE $COMPILER -c test.c
+    checkstat 'cache hit (direct)' 1
+    checkstat 'cache hit (preprocessed)' 0
+    checkstat 'cache miss' 1
+
+    ##################################################################
+    # A removed but previously compiled header file should be handled
+    # gracefully.
+    testname="missing header file"
+    $CCACHE -z >/dev/null
+
+    cat <<EOF >test1.h
+/* No more include of test3.h */
+int test1;
+EOF
+    sleep 1 # Sleep to make the include file trusted.
+    rm -f test3.h
+
+    $CCACHE $COMPILER -c test.c
+    checkstat 'cache hit (direct)' 0
+    checkstat 'cache hit (preprocessed)' 0
+    checkstat 'cache miss' 1
+
+    $CCACHE $COMPILER -c test.c
+    checkstat 'cache hit (direct)' 1
+    checkstat 'cache hit (preprocessed)' 0
+    checkstat 'cache miss' 1
+
+    ##################################################################
+    # Reset CCACHE_NODIRECT again.
+    CCACHE_NODIRECT=1
+}
+
  ######
  # main program
  rm -rf $TESTDIR
@@ -258,6 +361,8 @@ cd $TESTDIR || exit 1
  mkdir .ccache
  CCACHE_DIR=.ccache
  export CCACHE_DIR
+CCACHE_NODIRECT=1
+export CCACHE_NODIRECT
  
  testsuite="base"
  CCACHE_COMPILE="$CCACHE $COMPILER"
@@ -296,6 +401,9 @@ export CCACHE_NLEVELS
  basetests
  unset CCACHE_NLEVELS
  
+testsuite="direct"
+direct_tests
+
  cd ..
  rm -rf $TESTDIR
  echo test done - OK
diff --git a/util.c b/util.c

index a15492bcb6114681d0f055f819ddd1f514b8e50b..c5199d16eacd48e38d0da7643a2ca1d094da343a 100644 (file)
--- a/util.c
+++ b/util.c
@@ -344,6 +344,29 @@ int create_dir(const char *dir)
         return 0;
  }
  
+/*
+ * Return a string to be used to distinguish temporary files. Also tries to
+ * cope with NFS by adding the local hostname.
+ */
+const char *tmp_string(void)
+{
+       static char *ret;
+
+       if (!ret) {
+               char hostname[200];
+               strcpy(hostname, "unknown");
+#if HAVE_GETHOSTNAME
+               gethostname(hostname, sizeof(hostname)-1);
+#endif
+               hostname[sizeof(hostname)-1] = 0;
+               if (asprintf(&ret, "%s.%u", hostname, (unsigned)getpid()) == -1) {
+                       fatal("could not allocate tmp_string\n");
+               }
+       }
+
+       return ret;
+}
+
  char const CACHEDIR_TAG[] =
         "Signature: 8a477f597d28d172789f06886806bc55\n"
         "# This file is a cache directory tag created by ccache.\n"
@@ -408,6 +431,19 @@ char *x_strdup(const char *s)
         return ret;
  }
  
+/*
+  this is like strndup() but dies if the malloc fails
+*/
+char *x_strndup(const char *s, size_t n)
+{
+       char *ret;
+       ret = strndup(s, n);
+       if (!ret) {
+               fatal("Out of memory in strndup\n");
+       }
+       return ret;
+}
+
  /*
    this is like malloc() but dies if the malloc fails
  */
@@ -501,12 +537,12 @@ char *dirname(char *s)
         return s;
  }
  
-int lock_fd(int fd)
+static int lock_fd(int fd, short type)
  {
         struct flock fl;
         int ret;
  
-       fl.l_type = F_WRLCK;
+       fl.l_type = type;
         fl.l_whence = SEEK_SET;
         fl.l_start = 0;
         fl.l_len = 1;
@@ -520,6 +556,16 @@ int lock_fd(int fd)
         return ret;
  }
  
+int read_lock_fd(int fd)
+{
+       return lock_fd(fd, F_RDLCK);
+}
+
+int write_lock_fd(int fd)
+{
+       return lock_fd(fd, F_WRLCK);
+}
+
  /* return size on disk of a file */
  size_t file_size(struct stat *st)
  {
@@ -688,4 +734,3 @@ const char *get_home_directory(void)
         cc_log("Unable to determine home directory");
         return NULL;
  }
-
author	Joel Rosdahl <joel@rosdahl.net>
	Sat, 14 Nov 2009 15:14:55 +0000 (16:14 +0100)
committer	Joel Rosdahl <joel@rosdahl.net>
	Tue, 5 Jan 2010 17:53:01 +0000 (18:53 +0100)
Makefile.in		patch \| blob \| blame \| history
NEWS		patch \| blob \| blame \| history
ccache.c		patch \| blob \| blame \| history
ccache.h		patch \| blob \| blame \| history
ccache.yo		patch \| blob \| blame \| history
dump-manifest	[new file with mode: 0755]	patch \| blob
hash.c		patch \| blob \| blame \| history
hashtable.c	[new file with mode: 0644]	patch \| blob
hashtable.h	[new file with mode: 0644]	patch \| blob
hashtable_itr.c	[new file with mode: 0644]	patch \| blob
hashtable_itr.h	[new file with mode: 0644]	patch \| blob
hashtable_private.h	[new file with mode: 0644]	patch \| blob
hashutil.c	[new file with mode: 0644]	patch \| blob
hashutil.h	[new file with mode: 0644]	patch \| blob
manifest.c	[new file with mode: 0644]	patch \| blob
manifest.h	[new file with mode: 0644]	patch \| blob
murmurhashneutral2.c	[new file with mode: 0644]	patch \| blob
murmurhashneutral2.h	[new file with mode: 0644]	patch \| blob
stats.c		patch \| blob \| blame \| history
test.sh		patch \| blob \| blame \| history
util.c		patch \| blob \| blame \| history