]> git.ipfire.org Git - thirdparty/ccache.git/commitdiff
Reimplement the hard link mode
authorJoel Rosdahl <joel@rosdahl.net>
Fri, 5 Jul 2019 19:43:07 +0000 (21:43 +0200)
committerJoel Rosdahl <joel@rosdahl.net>
Mon, 15 Jul 2019 09:31:50 +0000 (11:31 +0200)
- Files stored by hard linking are saved as _N.raw files next to their
  .result file, where N is the 0-based index of the entry in the .result
  file.
- The .result file stores expected file sizes for the .raw files and the
  code verifies that they are correct before retrieving the files from
  the cache.
- The manual has been updated to mention the new file size check and
  also some other caveats.

The previous hard link mode implementation was removed in
3999a9ca67e2eabb8457920108b546713f200161 and
a1dbb8a15e034e5a63eb6b4407c9e3ccac469162. The latter commit lists three
points:

1. Hard links are error prone.
2. Compression will make hard links obsolete as a means of saving cache
   space.
3. A future backend storage API will be easier to write.

Point 1 is still true, but since the result file now stores expected
file sizes, many inadvertent modifications of files will be detected.

Point 2 is also still true, but you might want to trade cache size for
speed in cases where increased speed actually is measurable, like with
very large object files.

Point 3 does not quite hold after thinking some more about future APIs.
I think that it will be relatively straight-forward to add operations
like supports_raw_files, get_raw_file and put_raw_file to the API.

14 files changed:
doc/MANUAL.adoc
misc/performance
src/ccache.c
src/ccache.h
src/common_header.c
src/conf.c
src/conf.h
src/confitems.gperf
src/envtoconfitems.gperf
src/result.c
src/util.c
test/run
test/suites/hardlink.bash [new file with mode: 0644]
unittest/test_conf.c

index 2755275c769e852b40ddf888398974cdb4377e14..1f8a9f544c1f6f07777955b68c2abb9e7572f9c6 100644 (file)
@@ -45,6 +45,7 @@ Features
 * Low overhead.
 * Compresses data in the cache to save disk space.
 * Checksums data in the cache to detect corruption.
+* Optionally uses hard links avoid copies (there are caveats, though).
 
 
 Limitations
@@ -91,8 +92,7 @@ but currently doesn't interact well with other tools that do the same thing.
 See <<_using_ccache_with_other_compiler_wrappers,USING CCACHE WITH OTHER
 COMPILER WRAPPERS>>.
 
-WARNING: Do not use a hard link, use a symbolic link. A hard link will cause
-``interesting'' problems.
+WARNING: Use a symbolic links for masquerading, not hard links.
 
 Options
 -------
@@ -364,6 +364,9 @@ Compression is done using the Zstandard algorithm. The algorithm is fast enough
 that there should be little reason to turn off compression to gain performance.
 One exception is if the cache is located on a compressed file system, in which
 case the compression performed by ccache of course is redundant.
++
+Compression will be disabled if hard linking (the *hard_link* setting) is
+enabled.
 
 *compression_level* (*CCACHE_COMPRESSLEVEL*)::
 
@@ -428,6 +431,37 @@ Semantics of *compression_level*:
     the hash sum that identifies the build. The list separator is semicolon on
     Windows systems and colon on other systems.
 
+*hard_link* (*CCACHE_HARDLINK* or *CCACHE_NOHARDLINK*, see <<_boolean_values,Boolean values>> above)::
+
+    If true, ccache will attempt to use hard links to store compiler output
+    files in the cache, and similarly use hard links when retrieving files from
+    the cache. The default is false.
++
+An exception is dependency files (`.d`) which are never stored as hard links.
++
+Files stored via hard links cannot be compressed, so the cache size will likely
+be significantly larger if this option is enabled. However, performance may be
+improved depending on the use case.
++
+WARNING: Do not enable this option unless you are aware of these caveats:
++
+* If the resulting file is modified, the file in the cache will also be
+  modified since they share content, which corrupts the cache entry. As of
+  version 3.8, ccache performs a simple integrity check for cached files by
+  verifying that their sizes are correct. This means that mistakes like `strip
+  file.o` or `echo >file.o` will be detected, but a modification that doesn't
+  change the file size will not.
+* Programs that don't expect that files from two different identical
+  compilations are hard links to each other can fail.
+* Programs that rely on modification times (like ``make'') can be confused if
+  several users (or one user with several build trees) use the same cache
+  directory. The reason for this is that the object files share i-nodes and
+  therefore modification times. If *file.o* is in build tree A (hard-linked
+  from the cache) and *file.o* then is produced by ccache in build tree B by
+  hard-linking from the cache, the modification timestamp will be updated for
+  *file.o* in build tree A as well. This can retrigger relinking in build tree
+  A even though nothing really has changed.
+
 *hash_dir* (*CCACHE_HASHDIR* or *CCACHE_NOHASHDIR*, see <<_boolean_values,Boolean values>> above)::
 
     If true (which is the default), ccache will include the current working
@@ -1093,6 +1127,8 @@ directory. To share a cache without unpleasant side effects, the following
 conditions should to be met:
 
 * Use the same cache directory.
+* Make sure that the configuration setting *hard_link* is false (which is the
+  default).
 * Make sure that all users are in the same group.
 * Set the configuration setting *umask* to 002. This ensures that cached files
   are accessible to everyone in the group.
@@ -1108,6 +1144,13 @@ find $CCACHE_DIR -type d | xargs chmod g+s
 ----
 --
 
+The reason to avoid the hard link mode is that the hard links cause unwanted
+side effects, as all links to a cached file share the file's modification
+timestamp. This results in false dependencies to be triggered by
+timestamp-based build systems whenever another user links to an existing file.
+Typically, users will see that their libraries and binaries are relinked
+without reason.
+
 You may also want to make sure that a base directory is set appropriately, as
 discussed in a previous section.
 
index 8e38b7a16bfd37be1bd89bef7352787204bc5d6f..5ab04e2261396959404bba54b0e876cbb2b2f808 100755 (executable)
@@ -101,6 +101,8 @@ def test(tmp_dir, options, compiler_args, source_file):
         environment["CCACHE_NOCOMPRESS"] = "1"
     if options.compression_level:
         environment["CCACHE_COMPRESSLEVEL"] = str(options.compression_level)
+    if options.hardlink:
+        environment["CCACHE_HARDLINK"] = "1"
     if options.nostats:
         environment["CCACHE_NOSTATS"] = "1"
 
@@ -266,6 +268,7 @@ def main(argv):
             " files (default: %s)" % DEFAULT_DIRECTORY
         ),
     )
+    op.add_option("--hardlink", help="use hard links", action="store_true")
     op.add_option(
         "--hit-factor",
         help=(
@@ -322,6 +325,7 @@ def main(argv):
         print("Compilercheck:", options.compilercheck)
         print("Compression:", on_off(not options.no_compression))
         print("Compression level:", options.compression_level or "default")
+        print("Hardlink:", on_off(options.hardlink))
         print("Nostats:", on_off(options.nostats))
 
     tmp_dir = "%s/perfdir.%d" % (abspath(options.directory), getpid())
index 9189b6e8367c31cbdf36ee53671125f0ea1190b8..8f00df7f49b177e9c53ec2dd3b4747bebacfdfae 100644 (file)
@@ -141,7 +141,7 @@ static struct digest *cached_result_name;
 
 // Full path to the file containing the result
 // (cachedir/a/b/cdef[...]-size.result).
-static char *cached_result;
+static char *cached_result_path;
 
 // Full path to the file containing the manifest
 // (cachedir/a/b/cdef[...]-size.manifest).
@@ -556,24 +556,6 @@ get_current_working_dir(void)
        return current_working_dir;
 }
 
-// Transform a name to a full path into the cache directory, creating needed
-// sublevels if needed. Caller frees.
-static char *
-get_path_in_cache(const char *name, const char *suffix)
-{
-       char *path = x_strdup(conf->cache_dir);
-       for (unsigned i = 0; i < conf->cache_dir_levels; ++i) {
-               char *p = format("%s/%c", path, name[i]);
-               free(path);
-               path = p;
-       }
-
-       char *result =
-               format("%s/%s%s", path, name + conf->cache_dir_levels, suffix);
-       free(path);
-       return result;
-}
-
 // This function hashes an include file and stores the path and hash in the
 // global included_files variable. If the include file is a PCH, cpp_hash is
 // also updated. Takes over ownership of path.
@@ -1187,8 +1169,8 @@ update_manifest_file(void)
        }
 
        MTR_BEGIN("manifest", "manifest_put");
+       cc_log("Adding result name to %s", manifest_path);
        if (manifest_put(manifest_path, cached_result_name, included_files)) {
-               cc_log("Added result name to %s", manifest_path);
                if (x_stat(manifest_path, &st) == 0) {
                        stats_update_size(
                                manifest_stats_file,
@@ -1207,7 +1189,7 @@ update_cached_result_globals(struct digest *result_name)
        char result_name_string[DIGEST_STRING_BUFFER_SIZE];
        digest_as_string(result_name, result_name_string);
        cached_result_name = result_name;
-       cached_result = get_path_in_cache(result_name_string, ".result");
+       cached_result_path = get_path_in_cache(result_name_string, ".result");
        stats_file = format("%s/%c/stats", conf->cache_dir, result_name_string[0]);
 }
 
@@ -1218,6 +1200,13 @@ to_cache(struct args *args, struct hash *depend_mode_hash)
        args_add(args, "-o");
        args_add(args, output_obj);
 
+       if (conf->hard_link) {
+               // Workaround for Clang bug where it overwrites an existing object file
+               // when it's compiling an assembler file, see
+               // <https://bugs.llvm.org/show_bug.cgi?id=39782>.
+               x_unlink(output_obj);
+       }
+
        if (generating_diagnostics) {
                args_add(args, "--serialize-diagnostics");
                args_add(args, output_dia);
@@ -1412,13 +1401,13 @@ to_cache(struct args *args, struct hash *depend_mode_hash)
                result_files_add(result_files, output_dwo, ".dwo");
        }
        struct stat orig_dest_st;
-       bool orig_dest_existed = stat(cached_result, &orig_dest_st) == 0;
-       result_put(cached_result, result_files);
+       bool orig_dest_existed = stat(cached_result_path, &orig_dest_st) == 0;
+       result_put(cached_result_path, result_files);
        result_files_free(result_files);
 
-       cc_log("Stored in cache: %s", cached_result);
+       cc_log("Stored in cache: %s", cached_result_path);
 
-       if (x_stat(cached_result, &st) != 0) {
+       if (x_stat(cached_result_path, &st) != 0) {
                stats_update(STATS_ERROR);
                failed();
        }
@@ -2053,11 +2042,11 @@ calculate_result_name(struct args *args, struct hash *hash, int direct_mode)
                        return NULL;
                }
 
-               char manifest_name[DIGEST_STRING_BUFFER_SIZE];
-               hash_result_as_string(hash, manifest_name);
-               manifest_path = get_path_in_cache(manifest_name, ".manifest");
+               char manifest_name_string[DIGEST_STRING_BUFFER_SIZE];
+               hash_result_as_string(hash, manifest_name_string);
+               manifest_path = get_path_in_cache(manifest_name_string, ".manifest");
                manifest_stats_file =
-                       format("%s/%c/stats", conf->cache_dir, manifest_name[0]);
+                       format("%s/%c/stats", conf->cache_dir, manifest_name_string[0]);
 
                cc_log("Looking for result name in %s", manifest_path);
                MTR_BEGIN("manifest", "manifest_get");
@@ -2156,7 +2145,7 @@ from_cache(enum fromcache_call_mode mode, bool put_result_in_manifest)
        if (generating_diagnostics) {
                result_files_add(result_files, output_dia, ".dia");
        }
-       bool ok = result_get(cached_result, result_files);
+       bool ok = result_get(cached_result_path, result_files);
        result_files_free(result_files);
        if (!ok) {
                cc_log("Failed to get result from cache");
@@ -3567,7 +3556,7 @@ cc_reset(void)
        free(output_dia); output_dia = NULL;
        free(output_dwo); output_dwo = NULL;
        free(cached_result_name); cached_result_name = NULL;
-       free(cached_result); cached_result = NULL;
+       free(cached_result_path); cached_result_path = NULL;
        free(manifest_path); manifest_path = NULL;
        time_of_compilation = 0;
        for (size_t i = 0; i < ignore_headers_len; i++) {
index 6ec96119fa896c1e901ed136895d296538c9a1c3..860f5ea8c018d39aab76a1795750bd5c0dde87d7 100644 (file)
@@ -149,8 +149,9 @@ void cc_dump_debug_log_buffer(const char *path);
 void fatal(const char *format, ...) ATTR_FORMAT(printf, 1, 2) ATTR_NORETURN;
 void warn(const char *format, ...) ATTR_FORMAT(printf, 1, 2);
 
+char *get_path_in_cache(const char *name, const char *suffix);
 bool copy_fd(int fd_in, int fd_out);
-bool copy_file(const char *src, const char *dest);
+bool copy_file(const char *src, const char *dest, bool via_tmp_file);
 bool move_file(const char *src, const char *dest);
 int create_dir(const char *dir);
 int create_parent_dirs(const char *path);
index c2f1e97a2001bbf9b803624b0beb9c3c590d7cb0..e5908944ef917925d02de062d16a359177850170 100644 (file)
@@ -109,12 +109,14 @@ bool common_header_initialize_for_reading(
                // consistency check for the non-compressed case. (A real checksum is used
                // for compressed data.)
                struct stat st;
-               if (x_fstat(fileno(input), &st) != 0
-                   || (uint64_t)st.st_size != header->content_size) {
+               if (x_fstat(fileno(input), &st) != 0) {
+                       return false;
+               }
+               if ((uint64_t)st.st_size != header->content_size) {
                        *errmsg = format(
-                               "Bad uncompressed file size (actual %lu bytes, expected %lu bytes)",
-                               (unsigned long)st.st_size,
-                               (unsigned long)header->content_size);
+                               "Bad uncompressed file size (actual %llu bytes, expected %llu bytes)",
+                               (unsigned long long)st.st_size,
+                               (unsigned long long)header->content_size);
                        return false;
                }
        }
index 833d24c711e943fae291f52239c3675341ba32f3..a7cbc1c933161c44302d9c43dea8a086a5001350 100644 (file)
@@ -140,6 +140,7 @@ conf_create(void)
        conf->direct_mode = true;
        conf->disable = false;
        conf->extra_files_to_hash = x_strdup("");
+       conf->hard_link = false;
        conf->hash_dir = true;
        conf->ignore_headers_in_manifest = x_strdup("");
        conf->keep_comments_cpp = false;
@@ -408,6 +409,7 @@ conf_print_items(struct conf *conf,
        ok &= print_item(conf, "direct_mode", printer, context);
        ok &= print_item(conf, "disable", printer, context);
        ok &= print_item(conf, "extra_files_to_hash", printer, context);
+       ok &= print_item(conf, "hard_link", printer, context);
        ok &= print_item(conf, "hash_dir", printer, context);
        ok &= print_item(conf, "ignore_headers_in_manifest", printer, context);
        ok &= print_item(conf, "keep_comments_cpp", printer, context);
index 7d14cfa6a4ffaffe7f324471ee798c463fb65e1c..07d8885722cb3a059cd6362f98582de2f97761a3 100644 (file)
@@ -17,6 +17,7 @@ struct conf {
        bool direct_mode;
        bool disable;
        char *extra_files_to_hash;
+       bool hard_link;
        bool hash_dir;
        char *ignore_headers_in_manifest;
        bool keep_comments_cpp;
index 05084375b0bbbabd2e88b32c588b4a28b0fd5f96..b0906ddcb10d6d2873721546a8e904637372af39 100644 (file)
@@ -33,6 +33,7 @@ depend_mode,                ITEM(depend_mode, bool)
 direct_mode,                ITEM(direct_mode, bool)
 disable,                    ITEM(disable, bool)
 extra_files_to_hash,        ITEM(extra_files_to_hash, env_string)
+hard_link,                  ITEM(hard_link, bool)
 hash_dir,                   ITEM(hash_dir, bool)
 ignore_headers_in_manifest, ITEM(ignore_headers_in_manifest, env_string)
 keep_comments_cpp,          ITEM(keep_comments_cpp, bool)
index 086cae6818823f68b3bd83637a1d6722d02bb1fd..374d19c5d3f16653b9bdbb799d594dda6a19f959 100644 (file)
@@ -26,6 +26,7 @@ DIRECT, "direct_mode"
 DISABLE, "disable"
 EXTENSION, "cpp_extension"
 EXTRAFILES, "extra_files_to_hash"
+HARDLINK, "hard_link"
 HASHDIR, "hash_dir"
 IGNOREHEADERS, "ignore_headers_in_manifest"
 LIMIT_MULTIPLE, "limit_multiple"
index b0b0bc4c3445c45e98a1ce7b7d81475354bc3366..81e18ef7eeb8766d882ee537d9f6d0c6c1fe9dbe 100644 (file)
@@ -18,6 +18,7 @@
 #include "common_header.h"
 #include "int_bytes_conversion.h"
 #include "compression.h"
+#include "hash.h"
 #include "result.h"
 
 // Result data format
 // <suffix>               ::= suffix_len bytes
 // <data_len>             ::= uint64_t
 // <data>                 ::= data_len bytes
-// <raw_file_entry>       ::= <raw_file_marker> <key_len> <key>
+// <raw_file_entry>       ::= <raw_file_marker> <suffix_len> <suffix> <file_len>
 // <raw_file_marker>      ::= 1 (uint8_t)
-// <key_len>              ::= uint8_t
-// <key>                  ::= key_len bytes
+// <file_len>             ::= uint64_t
 // <epilogue>             ::= <checksum>
 // <checksum>             ::= uint64_t ; XXH64 of content bytes
 //
@@ -79,6 +79,9 @@
 //
 // 1: Introduced in ccache 3.8.
 
+extern const struct conf *conf;
+extern char *stats_file;
+
 const char RESULT_MAGIC[4] = "cCrS";
 
 enum {
@@ -101,6 +104,21 @@ struct result_files {
        uint64_t *sizes;
 };
 
+typedef bool (*read_entry_fn)(
+       struct decompressor *decompressor,
+       struct decompr_state *decompr_state,
+       const char *result_path_in_cache,
+       uint32_t entry_number,
+       const struct result_files *list,
+       FILE *dump_stream);
+
+typedef bool (*write_entry_fn)(
+       struct compressor *compressor,
+       struct compr_state *compr_state,
+       const char *result_path_in_cache,
+       uint32_t entry_number,
+       const struct result_file *file);
+
 struct result_files *
 result_files_init(void)
 {
@@ -166,10 +184,13 @@ static bool
 read_embedded_file_entry(
        struct decompressor *decompressor,
        struct decompr_state *decompr_state,
+       const char *result_path_in_cache,
        uint32_t entry_number,
-       struct result_files *list,
+       const struct result_files *list,
        FILE *dump_stream)
 {
+       (void)result_path_in_cache;
+
        bool success = false;
        FILE *subfile = NULL;
 
@@ -180,21 +201,24 @@ read_embedded_file_entry(
        READ_BYTES(suffix, suffix_len);
        suffix[suffix_len] = '\0';
 
-       uint64_t filelen;
-       READ_UINT64(filelen);
-
-       cc_log("Reading embedded file #%u: %s (%llu)",
-              entry_number,
-              suffix,
-              (unsigned long long)filelen);
+       uint64_t file_len;
+       READ_UINT64(file_len);
 
        bool found = false;
        if (dump_stream) {
-               fprintf(dump_stream,
-                       "Entry: %s (size: %" PRIu64 " bytes)\n",
-                       suffix,
-                       filelen);
+               fprintf(
+                       dump_stream,
+                       "Embedded file #%u: %s (%" PRIu64 " bytes)\n",
+                       entry_number,
+                       suffix,
+                       file_len);
        } else {
+               cc_log(
+                       "Retrieving embedded file #%u %s (%llu bytes)",
+                       entry_number,
+                       suffix,
+                       (unsigned long long)file_len);
+
                for (uint32_t i = 0; i < list->n_files; i++) {
                        if (str_eq(suffix, list->files[i].suffix)) {
                                found = true;
@@ -207,7 +231,7 @@ read_embedded_file_entry(
                                        goto out;
                                }
                                char buf[READ_BUFFER_SIZE];
-                               size_t remain = filelen;
+                               size_t remain = file_len;
                                while (remain > 0) {
                                        size_t n = MIN(remain, sizeof(buf));
                                        READ_BYTES(buf, n);
@@ -223,10 +247,11 @@ read_embedded_file_entry(
                        }
                }
        }
+
        if (!found) {
                // Discard the file data.
                char buf[READ_BUFFER_SIZE];
-               size_t remain = filelen;
+               size_t remain = file_len;
                while (remain > 0) {
                        size_t n = MIN(remain, sizeof(buf));
                        READ_BYTES(buf, n);
@@ -244,6 +269,103 @@ out:
        return success;
 }
 
+static char *
+get_raw_file_path(const char *result_path_in_cache, uint32_t entry_number)
+{
+       return format(
+               "%.*s_%u.raw",
+               (int)strlen(result_path_in_cache) - 7, // .result
+               result_path_in_cache,
+               entry_number);
+}
+
+static bool
+copy_raw_file(const char *source, const char *dest, bool to_cache)
+{
+       if (conf->hard_link) {
+               x_try_unlink(dest);
+               cc_log("Hard linking %s to %s", source, dest);
+               int ret = link(source, dest);
+               if (ret == 0) {
+                       return true;
+               }
+               cc_log("Failed to hard link %s to %s: %s", source, dest, strerror(errno));
+       }
+
+       cc_log("Copying %s to %s", source, dest);
+       return copy_file(source, dest, to_cache);
+}
+
+static bool
+read_raw_file_entry(
+       struct decompressor *decompressor,
+       struct decompr_state *decompr_state,
+       const char *result_path_in_cache,
+       uint32_t entry_number,
+       const struct result_files *list,
+       FILE *dump_stream)
+{
+       bool success = false;
+       char *raw_path = get_raw_file_path(result_path_in_cache, entry_number);
+
+       uint8_t suffix_len;
+       READ_BYTE(suffix_len);
+
+       char suffix[256 + 1];
+       READ_BYTES(suffix, suffix_len);
+       suffix[suffix_len] = '\0';
+
+       uint64_t file_len;
+       READ_UINT64(file_len);
+
+       if (dump_stream) {
+               fprintf(
+                       dump_stream,
+                       "Raw file #%u: %s (%" PRIu64 " bytes)\n",
+                       entry_number,
+                       suffix,
+                       file_len);
+       } else {
+               cc_log(
+                       "Retrieving raw file #%u %s (%llu bytes)",
+                       entry_number,
+                       suffix,
+                       (unsigned long long)file_len);
+
+               struct stat st;
+               if (x_stat(raw_path, &st) != 0) {
+                       goto out;
+               }
+               if ((uint64_t)st.st_size != file_len) {
+                       cc_log(
+                               "Bad file size of %s (actual %llu bytes, expected %llu bytes)",
+                               raw_path,
+                               (unsigned long long)st.st_size,
+                               (unsigned long long)file_len);
+                       goto out;
+               }
+
+               for (uint32_t i = 0; i < list->n_files; i++) {
+                       if (str_eq(suffix, list->files[i].suffix)) {
+                               if (!copy_raw_file(raw_path, list->files[i].path, false)) {
+                                       goto out;
+                               }
+                               // Update modification timestamp to save the file from LRU cleanup
+                               // (and, if hard-linked, to make the object file newer than the source
+                               // file).
+                               update_mtime(raw_path);
+                               break;
+                       }
+               }
+       }
+
+       success = true;
+
+out:
+       free(raw_path);
+       return success;
+}
+
 static bool
 read_result(
        const char *path,
@@ -252,6 +374,7 @@ read_result(
        char **errmsg)
 {
        *errmsg = NULL;
+       bool cache_miss = false;
        bool success = false;
        struct decompressor *decompressor = NULL;
        struct decompr_state *decompr_state = NULL;
@@ -259,8 +382,7 @@ read_result(
 
        FILE *f = fopen(path, "rb");
        if (!f) {
-               // Cache miss.
-               *errmsg = x_strdup("No such result file");
+               cache_miss = true;
                goto out;
        }
 
@@ -289,22 +411,25 @@ read_result(
                uint8_t marker;
                READ_BYTE(marker);
 
+               read_entry_fn read_entry;
+
                switch (marker) {
                case EMBEDDED_FILE_MARKER:
-                       if (!read_embedded_file_entry(
-                                   decompressor, decompr_state, i, list, dump_stream)) {
-                               goto out;
-                       }
+                       read_entry = read_embedded_file_entry;
                        break;
 
                case RAW_FILE_MARKER:
-               // TODO: Implement.
-               // Fall through.
+                       read_entry = read_raw_file_entry;
+                       break;
 
                default:
                        *errmsg = format("Unknown entry type: %u", marker);
                        goto out;
                }
+
+               if (!read_entry(decompressor, decompr_state, path, i, list, dump_stream)) {
+                       goto out;
+               }
        }
 
        if (i != n_entries) {
@@ -335,8 +460,8 @@ out:
        if (checksum) {
                XXH64_freeState(checksum);
        }
-       if (!success && !*errmsg) {
-               *errmsg = x_strdup("Corrupt result file");
+       if (!success && !cache_miss && !*errmsg) {
+               *errmsg = x_strdup("Corrupt result");
        }
        return success;
 }
@@ -361,43 +486,134 @@ out:
                WRITE_BYTES(buf_, sizeof(buf_)); \
        } while (false)
 
+static bool
+write_embedded_file_entry(
+       struct compressor *compressor,
+       struct compr_state *compr_state,
+       const char *result_path_in_cache,
+       uint32_t entry_number,
+       const struct result_file *file)
+{
+       (void)result_path_in_cache;
+
+       bool success = false;
+
+       cc_log(
+               "Storing embedded file #%u %s (%llu bytes) from %s",
+               entry_number,
+               file->suffix,
+               (unsigned long long)file->size,
+               file->path);
+
+       WRITE_BYTE(EMBEDDED_FILE_MARKER);
+       size_t suffix_len = strlen(file->suffix);
+       WRITE_BYTE(suffix_len);
+       WRITE_BYTES(file->suffix, suffix_len);
+       WRITE_UINT64(file->size);
+
+       FILE *f = fopen(file->path, "rb");
+       if (!f) {
+               cc_log("Failed to open %s for reading", file->path);
+               goto error;
+       }
+       char buf[READ_BUFFER_SIZE];
+       size_t remain = file->size;
+       while (remain > 0) {
+               size_t n = MIN(remain, sizeof(buf));
+               if (fread(buf, 1, n, f) != n) {
+                       goto error;
+               }
+               WRITE_BYTES(buf, n);
+               remain -= n;
+       }
+       fclose(f);
+
+       success = true;
+
+error:
+       return success;
+}
+
+static bool
+write_raw_file_entry(
+       struct compressor *compressor,
+       struct compr_state *compr_state,
+       const char *result_path_in_cache,
+       uint32_t entry_number,
+       const struct result_file *file)
+{
+       bool success = false;
+
+       cc_log(
+               "Storing raw file #%u %s (%llu bytes) from %s",
+               entry_number,
+               file->suffix,
+               (unsigned long long)file->size,
+               file->path);
+
+       WRITE_BYTE(RAW_FILE_MARKER);
+       size_t suffix_len = strlen(file->suffix);
+       WRITE_BYTE(suffix_len);
+       WRITE_BYTES(file->suffix, suffix_len);
+       WRITE_UINT64(file->size);
+
+       char *raw_file = get_raw_file_path(result_path_in_cache, entry_number);
+       struct stat old_stat;
+       bool old_existed = stat(raw_file, &old_stat) == 0;
+
+       success = copy_raw_file(file->path, raw_file, true);
+
+       struct stat new_stat;
+       bool new_exists = stat(raw_file, &new_stat) == 0;
+       free(raw_file);
+
+       size_t old_size = old_existed ? file_size(&old_stat) : 0;
+       size_t new_size = new_exists ? file_size(&new_stat) : 0;
+       stats_update_size(
+               stats_file,
+               new_size - old_size,
+               (new_exists ? 1 : 0) - (old_existed ? 1 : 0));
+
+error:
+       return success;
+}
+
+static bool
+should_hard_link_suffix(const char *suffix)
+{
+       // - Don't hard link stderr outputs since they:
+       //   1. Never are large.
+       //   2. Will end up in a temporary file anyway.
+       //
+       // - Don't hard link .d files since they:
+       //   1. Never are large.
+       //   2. Compress well.
+       //   3. Cause trouble for automake if hard-linked (see ccache issue 378).
+       return !str_eq(suffix, RESULT_STDERR_NAME) && !str_eq(suffix, ".d");
+}
+
 static bool
 write_result(
        const struct result_files *list,
        struct compressor *compressor,
        struct compr_state *compr_state,
-       XXH64_state_t *checksum)
+       XXH64_state_t *checksum,
+       const char *result_path_in_cache)
 {
        WRITE_BYTE(list->n_files);
 
        for (uint32_t i = 0; i < list->n_files; i++) {
-               cc_log("Writing %s (%llu bytes) to %s",
-                      list->files[i].suffix,
-                      (unsigned long long)list->files[i].size,
-                      list->files[i].path);
-
-               WRITE_BYTE(EMBEDDED_FILE_MARKER);
-               size_t suffix_len = strlen(list->files[i].suffix);
-               WRITE_BYTE(suffix_len);
-               WRITE_BYTES(list->files[i].suffix, suffix_len);
-               WRITE_UINT64(list->files[i].size);
-
-               FILE *f = fopen(list->files[i].path, "rb");
-               if (!f) {
-                       cc_log("Failed to open %s for reading", list->files[i].path);
-                       goto error;
+               write_entry_fn write_entry;
+               if (conf->hard_link && should_hard_link_suffix(list->files[i].suffix)) {
+                       write_entry = write_raw_file_entry;
+               } else {
+                       write_entry = write_embedded_file_entry;
                }
-               char buf[READ_BUFFER_SIZE];
-               size_t remain = list->files[i].size;
-               while (remain > 0) {
-                       size_t n = MIN(remain, sizeof(buf));
-                       if (fread(buf, 1, n, f) != n) {
-                               goto error;
-                       }
-                       WRITE_BYTES(buf, n);
-                       remain -= n;
+
+               if (!write_entry(
+                           compressor, compr_state, result_path_in_cache, i, &list->files[i])) {
+                       goto error;
                }
-               fclose(f);
        }
 
        WRITE_UINT64(XXH64_digest(checksum));
@@ -415,13 +631,14 @@ bool result_get(const char *path, struct result_files *list)
 
        char *errmsg;
        bool success = read_result(path, list, NULL, &errmsg);
-       if (errmsg) {
-               cc_log("Error: %s", errmsg);
-               free(errmsg);
-       }
        if (success) {
                // Update modification timestamp to save files from LRU cleanup.
                update_mtime(path);
+       } else if (errmsg) {
+               cc_log("Error: %s", errmsg);
+               free(errmsg);
+       } else {
+               cc_log("No such result file");
        }
        return success;
 }
@@ -467,7 +684,7 @@ bool result_put(const char *path, struct result_files *list)
                goto out;
        }
 
-       bool ok = write_result(list, compressor, compr_state, checksum)
+       bool ok = write_result(list, compressor, compr_state, checksum, path)
                  && compressor->free(compr_state);
        if (!ok) {
                cc_log("Failed to write result file");
index b55b84ba2778d480d3224d8294f351f143882265..31582242921f464a800491a1e7e5f7558f473613 100644 (file)
@@ -34,6 +34,8 @@
 #include <tchar.h>
 #endif
 
+extern const struct conf *conf;
+
 // Destination for conf->log_file.
 static FILE *logfile;
 
@@ -54,8 +56,6 @@ static size_t debug_log_size;
 static bool
 init_log(void)
 {
-       extern struct conf *conf;
-
        if (debug_log_buffer || logfile || use_syslog) {
                return true;
        }
@@ -154,8 +154,6 @@ static void warn_log_fail(void) ATTR_NORETURN;
 static void
 warn_log_fail(void)
 {
-       extern struct conf *conf;
-
        // Note: Can't call fatal() since that would lead to recursion.
        fprintf(stderr, "ccache: error: Failed to write to %s: %s\n",
                conf->log_file, strerror(errno));
@@ -280,6 +278,24 @@ fatal(const char *format, ...)
        x_exit(1);
 }
 
+// Transform a name to a full path into the cache directory, creating needed
+// sublevels if needed. Caller frees.
+char *
+get_path_in_cache(const char *name, const char *suffix)
+{
+       char *path = x_strdup(conf->cache_dir);
+       for (unsigned i = 0; i < conf->cache_dir_levels; ++i) {
+               char *p = format("%s/%c", path, name[i]);
+               free(path);
+               path = p;
+       }
+
+       char *result =
+               format("%s/%s%s", path, name + conf->cache_dir_levels, suffix);
+       free(path);
+       return result;
+}
+
 // Copy all data from fd_in to fd_out.
 bool
 copy_fd(int fd_in, int fd_out)
@@ -335,9 +351,10 @@ get_umask(void)
 }
 #endif
 
-// Copy a file from src to dest.
+// Copy a file from src to dest. If via_tmp_file is true, the file is copied to
+// a temporary file and then renamed to dest.
 bool
-copy_file(const char *src, const char *dest)
+copy_file(const char *src, const char *dest, bool via_tmp_file)
 {
        bool result = false;
 
@@ -346,10 +363,18 @@ copy_file(const char *src, const char *dest)
                return false;
        }
 
-       int dest_fd = open(dest, O_WRONLY | O_CREAT | O_BINARY, 0666);
-       if (dest_fd == -1) {
-               close(dest_fd);
-               return false;
+       int dest_fd;
+       char *tmp_file = NULL;
+       if (via_tmp_file) {
+               tmp_file = x_strdup(dest);
+               dest_fd = create_tmp_fd(&tmp_file);
+       } else {
+               dest_fd = open(dest, O_WRONLY | O_CREAT | O_BINARY, 0666);
+               if (dest_fd == -1) {
+                       close(dest_fd);
+                       close(src_fd);
+                       return false;
+               }
        }
 
        if (copy_fd(src_fd, dest_fd)) {
@@ -358,6 +383,15 @@ copy_file(const char *src, const char *dest)
 
        close(dest_fd);
        close(src_fd);
+
+       if (via_tmp_file) {
+               x_try_unlink(dest);
+               if (x_rename(tmp_file, dest) != 0) {
+                       result = false;
+               }
+               free(tmp_file);
+       }
+
        return result;
 }
 
@@ -365,7 +399,7 @@ copy_file(const char *src, const char *dest)
 bool
 move_file(const char *src, const char *dest)
 {
-       bool ok = copy_file(src, dest);
+       bool ok = copy_file(src, dest, false);
        if (ok) {
                x_unlink(src);
        }
index 2a1a03a9bc42a16e0889ffaaab2d7913e60dbc4b..46d72299e55adce315c305eceadea820f9dd0cea 100755 (executable)
--- a/test/run
+++ b/test/run
@@ -406,6 +406,7 @@ sanitize_blacklist
 debug_prefix_map
 split_dwarf
 masquerading
+hardlink
 direct
 direct_gcc
 depend
diff --git a/test/suites/hardlink.bash b/test/suites/hardlink.bash
new file mode 100644 (file)
index 0000000..a003faf
--- /dev/null
@@ -0,0 +1,93 @@
+SUITE_hardlink_PROBE() {
+    touch file1
+    if ! ln file1 file2 >/dev/null 2>&1; then
+        echo "file system doesn't support hardlinks"
+    fi
+}
+
+SUITE_hardlink() {
+    # -------------------------------------------------------------------------
+    TEST "CCACHE_HARDLINK"
+
+    generate_code 1 test1.c
+
+    $REAL_COMPILER -c -o reference_test1.o test1.c
+
+    CCACHE_HARDLINK=1 $CCACHE_COMPILE -c test1.c
+    expect_stat 'cache hit (preprocessed)' 0
+    expect_stat 'cache miss' 1
+    expect_stat 'files in cache' 2
+    expect_equal_object_files reference_test1.o test1.o
+
+    mv test1.o test1.o.saved
+
+    CCACHE_HARDLINK=1 $CCACHE_COMPILE -c test1.c
+    expect_stat 'cache hit (preprocessed)' 1
+    expect_stat 'cache miss' 1
+    expect_stat 'files in cache' 2
+    if [ ! test1.o -ef test1.o.saved ]; then
+        test_failed "Object files not hard linked"
+    fi
+
+    # -------------------------------------------------------------------------
+    TEST "Corrupted file size is detected"
+
+    generate_code 1 test1.c
+
+    CCACHE_HARDLINK=1 $CCACHE_COMPILE -c test1.c
+    expect_stat 'cache hit (preprocessed)' 0
+    expect_stat 'cache miss' 1
+    expect_stat 'files in cache' 2
+
+    mv test1.o test1.o.saved
+
+    CCACHE_HARDLINK=1 $CCACHE_COMPILE -c test1.c
+    expect_stat 'cache hit (preprocessed)' 1
+    expect_stat 'cache miss' 1
+    expect_stat 'files in cache' 2
+
+    # -------------------------------------------------------------------------
+    TEST "Overwrite assembler"
+
+    generate_code 1 test1.c
+    $REAL_COMPILER -S -o test1.s test1.c
+
+    $REAL_COMPILER -c -o reference_test1.o test1.s
+
+    CCACHE_HARDLINK=1 $CCACHE_COMPILE -c test1.s
+    expect_stat 'cache hit (preprocessed)' 0
+    expect_stat 'cache miss' 1
+    expect_stat 'files in cache' 2
+
+    generate_code 2 test1.c
+    $REAL_COMPILER -S -o test1.s test1.c
+
+    CCACHE_HARDLINK=1 $CCACHE_COMPILE -c test1.s
+    expect_stat 'cache hit (preprocessed)' 0
+    expect_stat 'cache miss' 2
+    expect_stat 'files in cache' 4
+
+    generate_code 1 test1.c
+    $REAL_COMPILER -S -o test1.s test1.c
+
+    CCACHE_HARDLINK=1 $CCACHE_COMPILE -c test1.s
+    expect_stat 'cache hit (preprocessed)' 1
+    expect_stat 'cache miss' 2
+    expect_stat 'files in cache' 4
+    expect_equal_object_files reference_test1.o test1.o
+
+    # -------------------------------------------------------------------------
+    TEST "Automake depend move"
+
+    unset CCACHE_NODIRECT
+
+    generate_code 1 test1.c
+
+    CCACHE_HARDLINK=1 CCACHE_DEPEND=1 $CCACHE_COMPILE -c -MMD -MF test1.d.tmp test1.c
+    expect_stat 'cache hit (direct)' 0
+    mv test1.d.tmp test1.d || test_failed "first mv failed"
+
+    CCACHE_HARDLINK=1 CCACHE_DEPEND=1 $CCACHE_COMPILE -c -MMD -MF test1.d.tmp test1.c
+    expect_stat 'cache hit (direct)' 1
+    mv test1.d.tmp test1.d || test_failed "second mv failed"
+}
index a14800bb321a2ff83d456c0e395d424a44b38b8c..34e56c16c54d286e0be2adb6a1a09e21c0ca4138 100644 (file)
@@ -18,7 +18,7 @@
 #include "framework.h"
 #include "util.h"
 
-#define N_CONFIG_ITEMS 33
+#define N_CONFIG_ITEMS 34
 static struct {
        char *descr;
        char *origin;
@@ -63,6 +63,7 @@ TEST(conf_create)
        CHECK(conf->direct_mode);
        CHECK(!conf->disable);
        CHECK_STR_EQ("", conf->extra_files_to_hash);
+       CHECK(!conf->hard_link);
        CHECK(conf->hash_dir);
        CHECK_STR_EQ("", conf->ignore_headers_in_manifest);
        CHECK(!conf->keep_comments_cpp);
@@ -115,6 +116,7 @@ TEST(conf_read_valid_config)
                "direct_mode = false\n"
                "disable = true\n"
                "extra_files_to_hash = a:b c:$USER\n"
+               "hard_link = true\n"
                "hash_dir = false\n"
                "ignore_headers_in_manifest = a:b/c\n"
                "keep_comments_cpp = true\n"
@@ -154,6 +156,7 @@ TEST(conf_read_valid_config)
        CHECK(!conf->direct_mode);
        CHECK(conf->disable);
        CHECK_STR_EQ_FREE1(format("a:b c:%s", user), conf->extra_files_to_hash);
+       CHECK(conf->hard_link);
        CHECK(!conf->hash_dir);
        CHECK_STR_EQ("a:b/c", conf->ignore_headers_in_manifest);
        CHECK(conf->keep_comments_cpp);
@@ -463,6 +466,7 @@ TEST(conf_print_items)
                false,
                true,
                "efth",
+               true,
                .hash_dir = false,
                "ihim",
                true,
@@ -514,6 +518,7 @@ TEST(conf_print_items)
        CHECK_STR_EQ("direct_mode = false", received_conf_items[n++].descr);
        CHECK_STR_EQ("disable = true", received_conf_items[n++].descr);
        CHECK_STR_EQ("extra_files_to_hash = efth", received_conf_items[n++].descr);
+       CHECK_STR_EQ("hard_link = true", received_conf_items[n++].descr);
        CHECK_STR_EQ("hash_dir = false", received_conf_items[n++].descr);
        CHECK_STR_EQ("ignore_headers_in_manifest = ihim",
                     received_conf_items[n++].descr);