]> git.ipfire.org Git - thirdparty/postgresql.git/commitdiff
Add --copy-file-range option to pg_upgrade.
authorThomas Munro <tmunro@postgresql.org>
Tue, 5 Mar 2024 22:39:50 +0000 (11:39 +1300)
committerThomas Munro <tmunro@postgresql.org>
Tue, 5 Mar 2024 23:01:01 +0000 (12:01 +1300)
The copy_file_range() system call is available on at least Linux and
FreeBSD, and asks the kernel to use efficient ways to copy ranges of a
file.  Options available to the kernel include sharing block ranges
(similar to --clone mode), and pushing down block copies to the storage
layer.

For automated testing, see PG_TEST_PG_UPGRADE_MODE.  (Perhaps in a later
commit we could consider setting this mode for one of the CI targets.)

Reviewed-by: Peter Eisentraut <peter@eisentraut.org>
Discussion: https://postgr.es/m/CA%2BhUKGKe7Hb0-UNih8VD5UNZy5-ojxFb3Pr3xSBBL8qj2M2%3DdQ%40mail.gmail.com

configure
configure.ac
doc/src/sgml/ref/pgupgrade.sgml
meson.build
src/bin/pg_upgrade/TESTING
src/bin/pg_upgrade/check.c
src/bin/pg_upgrade/file.c
src/bin/pg_upgrade/option.c
src/bin/pg_upgrade/pg_upgrade.h
src/bin/pg_upgrade/relfilenumber.c
src/include/pg_config.h.in

index 46859a4244ee85fbad791372641ebe441bd03672..36feeafbb23a6ead538c446803ade19dac431493 100755 (executable)
--- a/configure
+++ b/configure
@@ -15259,7 +15259,7 @@ fi
 LIBS_including_readline="$LIBS"
 LIBS=`echo "$LIBS" | sed -e 's/-ledit//g' -e 's/-lreadline//g'`
 
-for ac_func in backtrace_symbols copyfile getifaddrs getpeerucred inet_pton kqueue mbstowcs_l memset_s posix_fallocate ppoll pthread_is_threaded_np setproctitle setproctitle_fast strchrnul strsignal syncfs sync_file_range uselocale wcstombs_l
+for ac_func in backtrace_symbols copyfile copy_file_range getifaddrs getpeerucred inet_pton kqueue mbstowcs_l memset_s posix_fallocate ppoll pthread_is_threaded_np setproctitle setproctitle_fast strchrnul strsignal syncfs sync_file_range uselocale wcstombs_l
 do :
   as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh`
 ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var"
index 88b75a7696cd1451ca9a7eab66c4298c12f31e74..57f734879e19fe8756c3fc7e4fd463dbf5451607 100644 (file)
@@ -1749,6 +1749,7 @@ LIBS=`echo "$LIBS" | sed -e 's/-ledit//g' -e 's/-lreadline//g'`
 AC_CHECK_FUNCS(m4_normalize([
        backtrace_symbols
        copyfile
+       copy_file_range
        getifaddrs
        getpeerucred
        inet_pton
index 68ec68f47b681c1af79b209b186600713f813558..58c6c2df8b86ccc486d19e078d31ead5edbbe015 100644 (file)
@@ -263,6 +263,19 @@ PostgreSQL documentation
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--copy-file-range</option></term>
+      <listitem>
+       <para>
+        Use the <function>copy_file_range</function> system call for efficient
+        copying.  On some file systems this gives results similar to
+        <option>--clone</option>, sharing physical disk blocks, while on others
+        it may still copy blocks, but do so via an optimized path.  At present,
+        it is supported on Linux and FreeBSD.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>-?</option></term>
       <term><option>--help</option></term>
index a198eca25db2e13a60ce1d5c711885dc2eb47a46..85788f9dd8f2a668ce11938a33a2456d15b1dbe6 100644 (file)
@@ -2420,6 +2420,7 @@ func_checks = [
   ['backtrace_symbols', {'dependencies': [execinfo_dep]}],
   ['clock_gettime', {'dependencies': [rt_dep], 'define': false}],
   ['copyfile'],
+  ['copy_file_range'],
   # gcc/clang's sanitizer helper library provides dlopen but not dlsym, thus
   # when enabling asan the dlopen check doesn't notice that -ldl is actually
   # required. Just checking for dlsym() ought to suffice.
index 81a4324a76d580ce7e45ef5da3e3e83f5eed81c3..00842ac6ec3ab092becdfe2275857567c39ce9fb 100644 (file)
@@ -20,8 +20,8 @@ export oldinstall=...otherversion/    (old version's install base path)
 See DETAILS below for more information about creation of the dump.
 
 You can also test the different transfer modes (--copy, --link,
---clone) by setting the environment variable PG_TEST_PG_UPGRADE_MODE
-to the respective command-line option, like
+--clone, --copy-file-range) by setting the environment variable
+PG_TEST_PG_UPGRADE_MODE to the respective command-line option, like
 
        make check PG_TEST_PG_UPGRADE_MODE=--link
 
index e36a7328bf0f84e9f14b90945c0a40646f73c1e0..5ab8fe800913400529f6b73de938dc4b5a3dc451 100644 (file)
@@ -235,6 +235,9 @@ check_new_cluster(void)
                        break;
                case TRANSFER_MODE_COPY:
                        break;
+               case TRANSFER_MODE_COPY_FILE_RANGE:
+                       check_copy_file_range();
+                       break;
                case TRANSFER_MODE_LINK:
                        check_hard_link();
                        break;
index 4850a682cb5d5f57ec18ebc315c08551239234cc..beba376f2eecf56b34f9160374ab5176458feb5c 100644 (file)
@@ -10,6 +10,7 @@
 #include "postgres_fe.h"
 
 #include <sys/stat.h>
+#include <limits.h>
 #include <fcntl.h>
 #ifdef HAVE_COPYFILE_H
 #include <copyfile.h>
@@ -140,6 +141,45 @@ copyFile(const char *src, const char *dst,
 }
 
 
+/*
+ * copyFileByRange()
+ *
+ * Copies a relation file from src to dst.
+ * schemaName/relName are relation's SQL name (used for error messages only).
+ */
+void
+copyFileByRange(const char *src, const char *dst,
+                               const char *schemaName, const char *relName)
+{
+#ifdef HAVE_COPY_FILE_RANGE
+       int                     src_fd;
+       int                     dest_fd;
+       ssize_t         nbytes;
+
+       if ((src_fd = open(src, O_RDONLY | PG_BINARY, 0)) < 0)
+               pg_fatal("error while copying relation \"%s.%s\": could not open file \"%s\": %s",
+                                schemaName, relName, src, strerror(errno));
+
+       if ((dest_fd = open(dst, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
+                                               pg_file_create_mode)) < 0)
+               pg_fatal("error while copying relation \"%s.%s\": could not create file \"%s\": %s",
+                                schemaName, relName, dst, strerror(errno));
+
+       do
+       {
+               nbytes = copy_file_range(src_fd, NULL, dest_fd, NULL, SSIZE_MAX, 0);
+               if (nbytes < 0)
+                       pg_fatal("error while copying relation \"%s.%s\": could not copy file range from \"%s\" to \"%s\": %s",
+                                        schemaName, relName, src, dst, strerror(errno));
+       }
+       while (nbytes > 0);
+
+       close(src_fd);
+       close(dest_fd);
+#endif
+}
+
+
 /*
  * linkFile()
  *
@@ -358,6 +398,44 @@ check_file_clone(void)
        unlink(new_link_file);
 }
 
+void
+check_copy_file_range(void)
+{
+       char            existing_file[MAXPGPATH];
+       char            new_link_file[MAXPGPATH];
+
+       snprintf(existing_file, sizeof(existing_file), "%s/PG_VERSION", old_cluster.pgdata);
+       snprintf(new_link_file, sizeof(new_link_file), "%s/PG_VERSION.copy_file_range_test", new_cluster.pgdata);
+       unlink(new_link_file);          /* might fail */
+
+#if defined(HAVE_COPY_FILE_RANGE)
+       {
+               int                     src_fd;
+               int                     dest_fd;
+
+               if ((src_fd = open(existing_file, O_RDONLY | PG_BINARY, 0)) < 0)
+                       pg_fatal("could not open file \"%s\": %s",
+                                        existing_file, strerror(errno));
+
+               if ((dest_fd = open(new_link_file, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
+                                                       pg_file_create_mode)) < 0)
+                       pg_fatal("could not create file \"%s\": %s",
+                                        new_link_file, strerror(errno));
+
+               if (copy_file_range(src_fd, NULL, dest_fd, NULL, SSIZE_MAX, 0) < 0)
+                       pg_fatal("could not copy file range between old and new data directories: %s",
+                                        strerror(errno));
+
+               close(src_fd);
+               close(dest_fd);
+       }
+#else
+       pg_fatal("copy_file_range not supported on this platform");
+#endif
+
+       unlink(new_link_file);
+}
+
 void
 check_hard_link(void)
 {
index 2917ec2329e69d4fa47ce9e3bfbd7d9a88993d34..8949c58de80f99c532978813830deb951cf7944b 100644 (file)
@@ -58,7 +58,8 @@ parseCommandLine(int argc, char *argv[])
                {"verbose", no_argument, NULL, 'v'},
                {"clone", no_argument, NULL, 1},
                {"copy", no_argument, NULL, 2},
-               {"sync-method", required_argument, NULL, 3},
+               {"copy-file-range", no_argument, NULL, 3},
+               {"sync-method", required_argument, NULL, 4},
 
                {NULL, 0, NULL, 0}
        };
@@ -203,6 +204,9 @@ parseCommandLine(int argc, char *argv[])
                                break;
 
                        case 3:
+                               user_opts.transfer_mode = TRANSFER_MODE_COPY_FILE_RANGE;
+                               break;
+                       case 4:
                                if (!parse_sync_method(optarg, &unused))
                                        exit(1);
                                user_opts.sync_method = pg_strdup(optarg);
@@ -301,6 +305,7 @@ usage(void)
        printf(_("  -V, --version                 display version information, then exit\n"));
        printf(_("  --clone                       clone instead of copying files to new cluster\n"));
        printf(_("  --copy                        copy files to new cluster (default)\n"));
+       printf(_("  --copy-file-range             copy files to new cluster with copy_file_range\n"));
        printf(_("  --sync-method=METHOD          set method for syncing files to disk\n"));
        printf(_("  -?, --help                    show this help, then exit\n"));
        printf(_("\n"
index d9a848cbfde6cee0fbe615300237779fb6a581bb..857d715049ee01d48df5bc4b65e044620b86aeb2 100644 (file)
@@ -256,6 +256,7 @@ typedef enum
 {
        TRANSFER_MODE_CLONE,
        TRANSFER_MODE_COPY,
+       TRANSFER_MODE_COPY_FILE_RANGE,
        TRANSFER_MODE_LINK,
 } transferMode;
 
@@ -402,11 +403,14 @@ void              cloneFile(const char *src, const char *dst,
                                          const char *schemaName, const char *relName);
 void           copyFile(const char *src, const char *dst,
                                         const char *schemaName, const char *relName);
+void           copyFileByRange(const char *src, const char *dst,
+                                                       const char *schemaName, const char *relName);
 void           linkFile(const char *src, const char *dst,
                                         const char *schemaName, const char *relName);
 void           rewriteVisibilityMap(const char *fromfile, const char *tofile,
                                                                 const char *schemaName, const char *relName);
 void           check_file_clone(void);
+void           check_copy_file_range(void);
 void           check_hard_link(void);
 
 /* fopen_priv() is no longer different from fopen() */
index 7ca221ee19050d26f260520aa7b5b7715c402c2d..a1fc5fec78d8b63f0d489765b57fd1f778e191b6 100644 (file)
@@ -37,6 +37,9 @@ transfer_all_new_tablespaces(DbInfoArr *old_db_arr, DbInfoArr *new_db_arr,
                case TRANSFER_MODE_COPY:
                        prep_status_progress("Copying user relation files");
                        break;
+               case TRANSFER_MODE_COPY_FILE_RANGE:
+                       prep_status_progress("Copying user relation files with copy_file_range");
+                       break;
                case TRANSFER_MODE_LINK:
                        prep_status_progress("Linking user relation files");
                        break;
@@ -250,6 +253,11 @@ transfer_relfile(FileNameMap *map, const char *type_suffix, bool vm_must_add_fro
                                                   old_file, new_file);
                                        copyFile(old_file, new_file, map->nspname, map->relname);
                                        break;
+                               case TRANSFER_MODE_COPY_FILE_RANGE:
+                                       pg_log(PG_VERBOSE, "copying \"%s\" to \"%s\" with copy_file_range",
+                                                  old_file, new_file);
+                                       copyFileByRange(old_file, new_file, map->nspname, map->relname);
+                                       break;
                                case TRANSFER_MODE_LINK:
                                        pg_log(PG_VERBOSE, "linking \"%s\" to \"%s\"",
                                                   old_file, new_file);
index 07e73567dc7086c397d1606253ffb096c80ce9b7..591e1ca3df66486e6968c362c22567459d428709 100644 (file)
@@ -78,6 +78,9 @@
 /* Define to 1 if you have the <copyfile.h> header file. */
 #undef HAVE_COPYFILE_H
 
+/* Define to 1 if you have the `copy_file_range' function. */
+#undef HAVE_COPY_FILE_RANGE
+
 /* Define to 1 if you have the <crtdefs.h> header file. */
 #undef HAVE_CRTDEFS_H