From: Joel Rosdahl Date: Mon, 15 Jul 2019 12:10:28 +0000 (+0200) Subject: Implement support for file cloning on Linux (Btrfs/XFS) X-Git-Tag: v4.0~901 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=3cc4bcd08e15c7851669188cbbd6a5a3f7116cc4;p=thirdparty%2Fccache.git Implement support for file cloning on Linux (Btrfs/XFS) - Added a new file_clone (CCACHE_FILECLONE) configuration setting. If set, ccache uses the FICLONE ioctl if available to clone files to/from the cache. If file cloning is not supported by the file system, ccache will silently fall back to copying (or hard linking if hard_link is enabled). - Compression will be disabled if file_clone is enabled, just like for hard_link. - file_clone has priority over hard_link. - Tested on Btrfs and XFS on Linux 5.0.0. Closes #213. --- diff --git a/configure.ac b/configure.ac index c67ec85a6..34a50ff9d 100644 --- a/configure.ac +++ b/configure.ac @@ -83,6 +83,8 @@ AC_CHECK_TYPES(long long) AC_CHECK_HEADERS(ctype.h pwd.h stdlib.h string.h strings.h sys/time.h sys/mman.h) AC_CHECK_HEADERS(syslog.h) AC_CHECK_HEADERS(termios.h) +AC_CHECK_HEADERS(sys/ioctl.h) +AC_CHECK_HEADERS(linux/fs.h) AC_CHECK_FUNCS(gethostname) AC_CHECK_FUNCS(getopt_long) diff --git a/doc/MANUAL.adoc b/doc/MANUAL.adoc index 1f8a9f544..375a05bd0 100644 --- a/doc/MANUAL.adoc +++ b/doc/MANUAL.adoc @@ -45,6 +45,8 @@ Features * Low overhead. * Compresses data in the cache to save disk space. * Checksums data in the cache to detect corruption. +* Optionally uses file cloning (AKA “copy on write” or reflinks) to avoid + copies (not supported by all file systems). * Optionally uses hard links avoid copies (there are caveats, though). @@ -365,8 +367,8 @@ that there should be little reason to turn off compression to gain performance. One exception is if the cache is located on a compressed file system, in which case the compression performed by ccache of course is redundant. + -Compression will be disabled if hard linking (the *hard_link* setting) is -enabled. +Compression will be disabled if file cloning (the *file_clone* setting) or hard +linking (the *hard_link* setting) is enabled. *compression_level* (*CCACHE_COMPRESSLEVEL*):: @@ -431,11 +433,24 @@ Semantics of *compression_level*: the hash sum that identifies the build. The list separator is semicolon on Windows systems and colon on other systems. +*file_clone* (*CCACHE_FILECLONE* or *CCACHE_NOFILECLONE*, see <<_boolean_values,Boolean values>> above):: + + If true, ccache will attempt to use file cloning (also known as “copy on + write”, “CoW” or “reflinks”) to store and fetch cached compiler results. + *file_clone* has priority over *hard_link*. The default is false. ++ +Files stored by cloning cannot be compressed, so the cache size will likely be +significantly larger if this option is enabled. However, performance may be +improved depending on the use case. ++ +Unlike the *hard_link* setting, *file_clone* is completely safe to use, but not +all file systems support the feature. For such file systems, ccache will fall +back to use plain copying (or hard links if *hard_link* is enabled). + *hard_link* (*CCACHE_HARDLINK* or *CCACHE_NOHARDLINK*, see <<_boolean_values,Boolean values>> above):: - If true, ccache will attempt to use hard links to store compiler output - files in the cache, and similarly use hard links when retrieving files from - the cache. The default is false. + If true, ccache will attempt to use hard links to store and fetch cached + compiler results. The default is false. + An exception is dependency files (`.d`) which are never stored as hard links. + diff --git a/misc/performance b/misc/performance index 5ab04e226..ca7b61f15 100755 --- a/misc/performance +++ b/misc/performance @@ -97,12 +97,14 @@ def test(tmp_dir, options, compiler_args, source_file): environment = {"CCACHE_DIR": ccache_dir, "PATH": environ["PATH"]} environment["CCACHE_COMPILERCHECK"] = options.compilercheck - if options.no_compression: - environment["CCACHE_NOCOMPRESS"] = "1" if options.compression_level: environment["CCACHE_COMPRESSLEVEL"] = str(options.compression_level) + if options.file_clone: + environment["CCACHE_FILECLONE"] = "1" if options.hardlink: environment["CCACHE_HARDLINK"] = "1" + if options.no_compression: + environment["CCACHE_NOCOMPRESS"] = "1" if options.nostats: environment["CCACHE_NOSTATS"] = "1" @@ -268,6 +270,7 @@ def main(argv): " files (default: %s)" % DEFAULT_DIRECTORY ), ) + op.add_option("--file-clone", help="use file cloning", action="store_true") op.add_option("--hardlink", help="use hard links", action="store_true") op.add_option( "--hit-factor", @@ -325,7 +328,8 @@ def main(argv): print("Compilercheck:", options.compilercheck) print("Compression:", on_off(not options.no_compression)) print("Compression level:", options.compression_level or "default") - print("Hardlink:", on_off(options.hardlink)) + print("File cloning:", on_off(options.file_clone)) + print("Hard linking:", on_off(options.hardlink)) print("Nostats:", on_off(options.nostats)) tmp_dir = "%s/perfdir.%d" % (abspath(options.directory), getpid()) diff --git a/src/ccache.h b/src/ccache.h index 7ae214e7d..259b98dfd 100644 --- a/src/ccache.h +++ b/src/ccache.h @@ -151,6 +151,7 @@ void warn(const char *format, ...) ATTR_FORMAT(printf, 1, 2); char *get_path_in_cache(const char *name, const char *suffix); bool copy_fd(int fd_in, int fd_out); +bool clone_file(const char *src, const char *dest, bool via_tmp_file); bool copy_file(const char *src, const char *dest, bool via_tmp_file); bool move_file(const char *src, const char *dest); int create_dir(const char *dir); diff --git a/src/conf.c b/src/conf.c index a7cbc1c93..6552b97b5 100644 --- a/src/conf.c +++ b/src/conf.c @@ -127,6 +127,7 @@ struct conf * conf_create(void) { struct conf *conf = x_malloc(sizeof(*conf)); + conf->base_dir = x_strdup(""); conf->cache_dir = format("%s/.ccache", get_home_directory()); conf->cache_dir_levels = 2; @@ -140,6 +141,7 @@ conf_create(void) conf->direct_mode = true; conf->disable = false; conf->extra_files_to_hash = x_strdup(""); + conf->file_clone = false; conf->hard_link = false; conf->hash_dir = true; conf->ignore_headers_in_manifest = x_strdup(""); @@ -161,6 +163,7 @@ conf_create(void) conf->temporary_dir = x_strdup(""); conf->umask = UINT_MAX; // Default: don't set umask. conf->unify = false; + conf->item_origins = x_malloc(confitems_count() * sizeof(char *)); for (size_t i = 0; i < confitems_count(); ++i) { conf->item_origins[i] = "default"; @@ -409,6 +412,7 @@ conf_print_items(struct conf *conf, ok &= print_item(conf, "direct_mode", printer, context); ok &= print_item(conf, "disable", printer, context); ok &= print_item(conf, "extra_files_to_hash", printer, context); + ok &= print_item(conf, "file_clone", printer, context); ok &= print_item(conf, "hard_link", printer, context); ok &= print_item(conf, "hash_dir", printer, context); ok &= print_item(conf, "ignore_headers_in_manifest", printer, context); diff --git a/src/conf.h b/src/conf.h index 07d888572..b64544d79 100644 --- a/src/conf.h +++ b/src/conf.h @@ -17,6 +17,7 @@ struct conf { bool direct_mode; bool disable; char *extra_files_to_hash; + bool file_clone; bool hard_link; bool hash_dir; char *ignore_headers_in_manifest; diff --git a/src/confitems.gperf b/src/confitems.gperf index b0906ddcb..972e0bdd3 100644 --- a/src/confitems.gperf +++ b/src/confitems.gperf @@ -33,6 +33,7 @@ depend_mode, ITEM(depend_mode, bool) direct_mode, ITEM(direct_mode, bool) disable, ITEM(disable, bool) extra_files_to_hash, ITEM(extra_files_to_hash, env_string) +file_clone, ITEM(file_clone, bool) hard_link, ITEM(hard_link, bool) hash_dir, ITEM(hash_dir, bool) ignore_headers_in_manifest, ITEM(ignore_headers_in_manifest, env_string) diff --git a/src/envtoconfitems.gperf b/src/envtoconfitems.gperf index 6f12cb378..042aadc21 100644 --- a/src/envtoconfitems.gperf +++ b/src/envtoconfitems.gperf @@ -26,6 +26,7 @@ DIRECT, "direct_mode" DISABLE, "disable" EXTENSION, "cpp_extension" EXTRAFILES, "extra_files_to_hash" +FILECLONE, "file_clone" HARDLINK, "hard_link" HASHDIR, "hash_dir" IGNOREHEADERS, "ignore_headers_in_manifest" diff --git a/src/result.c b/src/result.c index e72685d00..ab87df38a 100644 --- a/src/result.c +++ b/src/result.c @@ -282,6 +282,13 @@ get_raw_file_path(const char *result_path_in_cache, uint32_t entry_number) static bool copy_raw_file(const char *source, const char *dest, bool to_cache) { + if (conf->file_clone) { + cc_log("Cloning %s to %s", source, dest); + if (clone_file(source, dest, to_cache)) { + return true; + } + cc_log("Failed to clone: %s", strerror(errno)); + } if (conf->hard_link) { x_try_unlink(dest); cc_log("Hard linking %s to %s", source, dest); @@ -289,7 +296,7 @@ copy_raw_file(const char *source, const char *dest, bool to_cache) if (ret == 0) { return true; } - cc_log("Failed to hard link %s to %s: %s", source, dest, strerror(errno)); + cc_log("Failed to hard link: %s", strerror(errno)); } cc_log("Copying %s to %s", source, dest); @@ -603,13 +610,11 @@ write_result( WRITE_BYTE(list->n_files); for (uint32_t i = 0; i < list->n_files; i++) { - write_entry_fn write_entry; - if (conf->hard_link && should_hard_link_suffix(list->files[i].suffix)) { - write_entry = write_raw_file_entry; - } else { - write_entry = write_embedded_file_entry; - } - + bool store_raw = + conf->file_clone + || (conf->hard_link && should_hard_link_suffix(list->files[i].suffix)); + write_entry_fn write_entry = + store_raw ? write_raw_file_entry : write_embedded_file_entry; if (!write_entry( compressor, compr_state, result_path_in_cache, i, &list->files[i])) { goto error; diff --git a/src/util.c b/src/util.c index 315822429..6ed97f8e5 100644 --- a/src/util.c +++ b/src/util.c @@ -27,6 +27,19 @@ #include #endif +#ifdef __linux__ +# ifdef HAVE_SYS_IOCTL_H +# include +# endif +# ifdef HAVE_LINUX_FS_H +# include +# ifndef FICLONE +# define FICLONE _IOW(0x94, 9, int) +# endif +# define FILE_CLONING_SUPPORTED 1 +# endif +#endif + #ifdef _WIN32 #include #include @@ -351,6 +364,70 @@ get_umask(void) } #endif +// Clone a file from src to dest. If via_tmp_file is true, the file is cloned +// to a temporary file and then renamed to dest. +bool +clone_file(const char *src, const char *dest, bool via_tmp_file) +{ + bool result; + +#ifdef FILE_CLONING_SUPPORTED + +#if defined(__linux__) + int src_fd = open(src, O_RDONLY); + if (src_fd == -1) { + return false; + } + + int dest_fd; + char *tmp_file = NULL; + if (via_tmp_file) { + tmp_file = x_strdup(dest); + dest_fd = create_tmp_fd(&tmp_file); + } else { + dest_fd = open(dest, O_WRONLY | O_CREAT | O_BINARY, 0666); + if (dest_fd == -1) { + close(dest_fd); + close(src_fd); + return false; + } + } + + int saved_errno = 0; + if (ioctl(dest_fd, FICLONE, src_fd) == 0) { + result = true; + } else { + result = false; + saved_errno = errno; + } + + close(dest_fd); + close(src_fd); + + if (via_tmp_file) { + x_try_unlink(dest); + if (x_rename(tmp_file, dest) != 0) { + result = false; + } + free(tmp_file); + } + + errno = saved_errno; +#endif + +#else // FILE_CLONING_SUPPORTED + + (void)src; + (void)dest; + (void)via_tmp_file; + errno = EOPNOTSUPP; + result = false; + +#endif // FILE_CLONING_SUPPORTED + + return result; +} + // Copy a file from src to dest. If via_tmp_file is true, the file is copied to // a temporary file and then renamed to dest. bool diff --git a/test/run b/test/run index 46d72299e..3a1067e74 100755 --- a/test/run +++ b/test/run @@ -407,6 +407,7 @@ debug_prefix_map split_dwarf masquerading hardlink +fileclone direct direct_gcc depend diff --git a/test/suites/fileclone.bash b/test/suites/fileclone.bash new file mode 100644 index 000000000..c101c209a --- /dev/null +++ b/test/suites/fileclone.bash @@ -0,0 +1,55 @@ +SUITE_fileclone_PROBE() { + touch file1 + if ! cp --reflink=always file1 file2 >/dev/null 2>&1; then + echo "file system doesn't support file cloning" + fi +} + +SUITE_fileclone() { + # ------------------------------------------------------------------------- + TEST "Base case" + + generate_code 1 test.c + + $REAL_COMPILER -c -o reference_test.o test.c + + CCACHE_FILECLONE=1 CCACHE_DEBUG=1 $CCACHE_COMPILE -c test.c + expect_stat 'cache hit (preprocessed)' 0 + expect_stat 'cache miss' 1 + expect_stat 'files in cache' 2 + expect_equal_object_files reference_test.o test.o + + CCACHE_FILECLONE=1 CCACHE_DEBUG=1 $CCACHE_COMPILE -c test.c + expect_stat 'cache hit (preprocessed)' 1 + expect_stat 'cache miss' 1 + expect_stat 'files in cache' 2 + expect_equal_object_files reference_test.o test.o + if ! grep -q 'Cloning.*to test.o' test.o.ccache-log; then + test_failed "Did not try to clone file" + fi + if grep -q 'Failed to clone' test.o.ccache-log; then + test_failed "Failed to clone" + fi + + # ------------------------------------------------------------------------- + TEST "Cloning not used for stored non-raw result" + + generate_code 1 test.c + + $REAL_COMPILER -c -o reference_test.o test.c + + $CCACHE_COMPILE -c test.c + expect_stat 'cache hit (preprocessed)' 0 + expect_stat 'cache miss' 1 + expect_stat 'files in cache' 1 + expect_equal_object_files reference_test.o test.o + + CCACHE_FILECLONE=1 CCACHE_DEBUG=1 $CCACHE_COMPILE -c test.c + expect_stat 'cache hit (preprocessed)' 1 + expect_stat 'cache miss' 1 + expect_stat 'files in cache' 1 + expect_equal_object_files reference_test.o test.o + if grep -q 'Cloning' test.o.ccache-log; then + test_failed "Tried to clone" + fi +} diff --git a/unittest/test_conf.c b/unittest/test_conf.c index 34e56c16c..8eb6bae93 100644 --- a/unittest/test_conf.c +++ b/unittest/test_conf.c @@ -18,7 +18,7 @@ #include "framework.h" #include "util.h" -#define N_CONFIG_ITEMS 34 +#define N_CONFIG_ITEMS 35 static struct { char *descr; char *origin; @@ -63,6 +63,7 @@ TEST(conf_create) CHECK(conf->direct_mode); CHECK(!conf->disable); CHECK_STR_EQ("", conf->extra_files_to_hash); + CHECK(!conf->file_clone); CHECK(!conf->hard_link); CHECK(conf->hash_dir); CHECK_STR_EQ("", conf->ignore_headers_in_manifest); @@ -116,6 +117,7 @@ TEST(conf_read_valid_config) "direct_mode = false\n" "disable = true\n" "extra_files_to_hash = a:b c:$USER\n" + "file_clone = true\n" "hard_link = true\n" "hash_dir = false\n" "ignore_headers_in_manifest = a:b/c\n" @@ -156,6 +158,7 @@ TEST(conf_read_valid_config) CHECK(!conf->direct_mode); CHECK(conf->disable); CHECK_STR_EQ_FREE1(format("a:b c:%s", user), conf->extra_files_to_hash); + CHECK(conf->file_clone); CHECK(conf->hard_link); CHECK(!conf->hash_dir); CHECK_STR_EQ("a:b/c", conf->ignore_headers_in_manifest); @@ -466,6 +469,7 @@ TEST(conf_print_items) false, true, "efth", + .file_clone = true, true, .hash_dir = false, "ihim", @@ -518,6 +522,7 @@ TEST(conf_print_items) CHECK_STR_EQ("direct_mode = false", received_conf_items[n++].descr); CHECK_STR_EQ("disable = true", received_conf_items[n++].descr); CHECK_STR_EQ("extra_files_to_hash = efth", received_conf_items[n++].descr); + CHECK_STR_EQ("file_clone = true", received_conf_items[n++].descr); CHECK_STR_EQ("hard_link = true", received_conf_items[n++].descr); CHECK_STR_EQ("hash_dir = false", received_conf_items[n++].descr); CHECK_STR_EQ("ignore_headers_in_manifest = ihim",