]> git.ipfire.org Git - thirdparty/postgresql.git/commitdiff
Add file_extend_method=posix_fallocate,write_zeros.
authorThomas Munro <tmunro@postgresql.org>
Sat, 31 May 2025 10:50:22 +0000 (22:50 +1200)
committerThomas Munro <tmunro@postgresql.org>
Fri, 6 Feb 2026 04:41:42 +0000 (17:41 +1300)
Provide a way to disable the use of posix_fallocate() for relation
files.  It was introduced by commit 4d330a61bb1.  The new setting
file_extend_method=write_zeros can be used as a workaround for problems
reported from the field:

 * BTRFS compression is disabled by the use of posix_fallocate()
 * XFS could produce spurious ENOSPC errors in some Linux kernel
   versions, though that problem is reported to have been fixed

The default is file_extend_method=posix_fallocate if available, as
before.  The write_zeros option is similar to PostgreSQL < 16, except
that now it's multi-block.

Backpatch-through: 16
Reviewed-by: Jakub Wartak <jakub.wartak@enterprisedb.com>
Reported-by: Dimitrios Apostolou <jimis@gmx.net>
Discussion: https://postgr.es/m/b1843124-fd22-e279-a31f-252dffb6fbf2%40gmx.net

doc/src/sgml/config.sgml
src/backend/storage/file/fd.c
src/backend/storage/smgr/md.c
src/backend/utils/misc/guc_tables.c
src/backend/utils/misc/postgresql.conf.sample
src/include/storage/fd.h

index 17273abbadcfa46daa0ba58878ee7696badb7a99..880e83035dcfe0b3b26bb5f3c6f767712f490147 100644 (file)
@@ -2295,6 +2295,43 @@ include_dir 'conf.d'
       </listitem>
      </varlistentry>
 
+     <varlistentry id="guc-file-extend-method" xreflabel="file_extend_method">
+      <term><varname>file_extend_method</varname> (<type>enum</type>)
+      <indexterm>
+       <primary><varname>file_extend_method</varname> configuration parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        Specifies the method used to extend data files during bulk operations
+        such as <command>COPY</command>.  The first available option is used as
+        the default, depending on the operating system:
+        <itemizedlist>
+         <listitem>
+          <para>
+           <literal>posix_fallocate</literal> (Unix) uses the standard POSIX
+            interface for allocating disk space, but is missing on some systems.
+            If it is present but the underlying file system doesn't support it,
+            this option silently falls back to <literal>write_zeros</literal>.
+            Current versions of BTRFS are known to disable compression when
+            this option is used.
+            This is the default on systems that have the function.
+           </para>
+         </listitem>
+         <listitem>
+          <para>
+           <literal>write_zeros</literal> extends files by writing out blocks
+            of zero bytes.  This is the default on systems that don't have the
+            function <function>posix_fallocate</function>.
+          </para>
+         </listitem>
+        </itemizedlist>
+        The <literal>write_zeros</literal> method is always used when data
+        files are extended by 8 blocks or fewer.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry id="guc-max-notify-queue-pages" xreflabel="max_notify_queue_pages">
       <term><varname>max_notify_queue_pages</varname> (<type>integer</type>)
       <indexterm>
index 66008275becf5b9fb459cc20a49b16bad4bde0e5..95d8982f88b438658fa9fd5ed17ab7a38e0d863d 100644 (file)
@@ -164,6 +164,9 @@ bool                data_sync_retry = false;
 /* How SyncDataDirectory() should do its job. */
 int                    recovery_init_sync_method = DATA_DIR_SYNC_METHOD_FSYNC;
 
+/* How data files should be bulk-extended with zeros. */
+int                    file_extend_method = DEFAULT_FILE_EXTEND_METHOD;
+
 /* Which kinds of files should be opened with PG_O_DIRECT. */
 int                    io_direct_flags;
 
index f1ff257119bd2ae80162cc0264be39bb6aab75f7..5e4e209c49699f8b70a15edb9b52969acb3aee7f 100644 (file)
@@ -575,13 +575,24 @@ mdzeroextend(SMgrRelation reln, ForkNumber forknum,
                 * that decision should be made though? For now just use a cutoff of
                 * 8, anything between 4 and 8 worked OK in some local testing.
                 */
-               if (numblocks > 8)
+               if (numblocks > 8 &&
+                       file_extend_method != FILE_EXTEND_METHOD_WRITE_ZEROS)
                {
-                       int                     ret;
+                       int                     ret = 0;
 
-                       ret = FileFallocate(v->mdfd_vfd,
-                                                               seekpos, (off_t) BLCKSZ * numblocks,
-                                                               WAIT_EVENT_DATA_FILE_EXTEND);
+#ifdef HAVE_POSIX_FALLOCATE
+                       if (file_extend_method == FILE_EXTEND_METHOD_POSIX_FALLOCATE)
+                       {
+                               ret = FileFallocate(v->mdfd_vfd,
+                                                                       seekpos, (off_t) BLCKSZ * numblocks,
+                                                                       WAIT_EVENT_DATA_FILE_EXTEND);
+                       }
+                       else
+#endif
+                       {
+                               elog(ERROR, "unsupported file_extend_method: %d",
+                                        file_extend_method);
+                       }
                        if (ret != 0)
                        {
                                ereport(ERROR,
index a997dcb7dbcb7a7d6aaf2b2b96913214e5394048..6eccfa85428096d4c943aae7c3f8f127b6c5b5b7 100644 (file)
@@ -71,6 +71,7 @@
 #include "replication/slotsync.h"
 #include "replication/syncrep.h"
 #include "storage/bufmgr.h"
+#include "storage/fd.h"
 #include "storage/large_object.h"
 #include "storage/pg_shmem.h"
 #include "storage/predicate.h"
@@ -483,6 +484,14 @@ static const struct config_enum_entry wal_compression_options[] = {
        {NULL, 0, false}
 };
 
+static const struct config_enum_entry file_extend_method_options[] = {
+#ifdef HAVE_POSIX_FALLOCATE
+       {"posix_fallocate", FILE_EXTEND_METHOD_POSIX_FALLOCATE, false},
+#endif
+       {"write_zeros", FILE_EXTEND_METHOD_WRITE_ZEROS, false},
+       {NULL, 0, false}
+};
+
 /*
  * Options for enum values stored in other modules
  */
@@ -4991,6 +5000,16 @@ struct config_enum ConfigureNamesEnum[] =
                NULL, NULL, NULL
        },
 
+       {
+               {"file_extend_method", PGC_SIGHUP, RESOURCES_DISK,
+                       gettext_noop("Selects the method used for extending data files."),
+                       NULL
+               },
+               &file_extend_method,
+               DEFAULT_FILE_EXTEND_METHOD, file_extend_method_options,
+               NULL, NULL, NULL
+       },
+
        {
                {"wal_sync_method", PGC_SIGHUP, WAL_SETTINGS,
                        gettext_noop("Selects the method used for forcing WAL updates to disk."),
index 7e8d3294ec3b70a387d44b381d11766b94939e50..6c761f83165e4c7ae0b894af16fd398fec8d2d04 100644 (file)
 #temp_file_limit = -1                  # limits per-process temp file space
                                        # in kilobytes, or -1 for no limit
 
+#file_extend_method = posix_fallocate   # the default is the first option supported
+                                       # by the operating system:
+                                       #   posix_fallocate (most Unix-like systems)
+                                       #   write_zeros
+
 #max_notify_queue_pages = 1048576      # limits the number of SLRU pages allocated
                                        # for NOTIFY / LISTEN queue
 
index 1456ab383a4240e41bc8c70dda539bde39083403..b709a860268d63e5745479346ff8fb4923d71a32 100644 (file)
@@ -55,12 +55,23 @@ typedef int File;
 #define IO_DIRECT_WAL                  0x02
 #define IO_DIRECT_WAL_INIT             0x04
 
+enum FileExtendMethod
+{
+#ifdef HAVE_POSIX_FALLOCATE
+       FILE_EXTEND_METHOD_POSIX_FALLOCATE,
+#endif
+       FILE_EXTEND_METHOD_WRITE_ZEROS,
+};
+
+/* Default to the first available file_extend_method. */
+#define DEFAULT_FILE_EXTEND_METHOD 0
 
 /* GUC parameter */
 extern PGDLLIMPORT int max_files_per_process;
 extern PGDLLIMPORT bool data_sync_retry;
 extern PGDLLIMPORT int recovery_init_sync_method;
 extern PGDLLIMPORT int io_direct_flags;
+extern PGDLLIMPORT int file_extend_method;
 
 /*
  * This is private to fd.c, but exported for save/restore_backend_variables()