Provide a way to disable the use of posix_fallocate() for relation
files. It was introduced by commit
4d330a61bb1. The new setting
file_extend_method=write_zeros can be used as a workaround for problems
reported from the field:
* BTRFS compression is disabled by the use of posix_fallocate()
* XFS could produce spurious ENOSPC errors in some Linux kernel
versions, though that problem is reported to have been fixed
The default is file_extend_method=posix_fallocate if available, as
before. The write_zeros option is similar to PostgreSQL < 16, except
that now it's multi-block.
Backpatch-through: 16
Reviewed-by: Jakub Wartak <jakub.wartak@enterprisedb.com>
Reported-by: Dimitrios Apostolou <jimis@gmx.net>
Discussion: https://postgr.es/m/
b1843124-fd22-e279-a31f-
252dffb6fbf2%40gmx.net
</listitem>
</varlistentry>
+ <varlistentry id="guc-file-extend-method" xreflabel="file_extend_method">
+ <term><varname>file_extend_method</varname> (<type>enum</type>)
+ <indexterm>
+ <primary><varname>file_extend_method</varname> configuration parameter</primary>
+ </indexterm>
+ </term>
+ <listitem>
+ <para>
+ Specifies the method used to extend data files during bulk operations
+ such as <command>COPY</command>. The first available option is used as
+ the default, depending on the operating system:
+ <itemizedlist>
+ <listitem>
+ <para>
+ <literal>posix_fallocate</literal> (Unix) uses the standard POSIX
+ interface for allocating disk space, but is missing on some systems.
+ If it is present but the underlying file system doesn't support it,
+ this option silently falls back to <literal>write_zeros</literal>.
+ Current versions of BTRFS are known to disable compression when
+ this option is used.
+ This is the default on systems that have the function.
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ <literal>write_zeros</literal> extends files by writing out blocks
+ of zero bytes. This is the default on systems that don't have the
+ function <function>posix_fallocate</function>.
+ </para>
+ </listitem>
+ </itemizedlist>
+ The <literal>write_zeros</literal> method is always used when data
+ files are extended by 8 blocks or fewer.
+ </para>
+ </listitem>
+ </varlistentry>
+
<varlistentry id="guc-max-notify-queue-pages" xreflabel="max_notify_queue_pages">
<term><varname>max_notify_queue_pages</varname> (<type>integer</type>)
<indexterm>
/* How SyncDataDirectory() should do its job. */
int recovery_init_sync_method = DATA_DIR_SYNC_METHOD_FSYNC;
+/* How data files should be bulk-extended with zeros. */
+int file_extend_method = DEFAULT_FILE_EXTEND_METHOD;
+
/* Which kinds of files should be opened with PG_O_DIRECT. */
int io_direct_flags;
* that decision should be made though? For now just use a cutoff of
* 8, anything between 4 and 8 worked OK in some local testing.
*/
- if (numblocks > 8)
+ if (numblocks > 8 &&
+ file_extend_method != FILE_EXTEND_METHOD_WRITE_ZEROS)
{
- int ret;
+ int ret = 0;
- ret = FileFallocate(v->mdfd_vfd,
- seekpos, (pgoff_t) BLCKSZ * numblocks,
- WAIT_EVENT_DATA_FILE_EXTEND);
+#ifdef HAVE_POSIX_FALLOCATE
+ if (file_extend_method == FILE_EXTEND_METHOD_POSIX_FALLOCATE)
+ {
+ ret = FileFallocate(v->mdfd_vfd,
+ seekpos, (pgoff_t) BLCKSZ * numblocks,
+ WAIT_EVENT_DATA_FILE_EXTEND);
+ }
+ else
+#endif
+ {
+ elog(ERROR, "unsupported file_extend_method: %d",
+ file_extend_method);
+ }
if (ret != 0)
{
ereport(ERROR,
options => 'file_copy_method_options',
},
+{ name => 'file_extend_method', type => 'enum', context => 'PGC_SIGHUP', group => 'RESOURCES_DISK',
+ short_desc => 'Selects the method used for extending data files.',
+ variable => 'file_extend_method',
+ boot_val => 'DEFAULT_FILE_EXTEND_METHOD',
+ options => 'file_extend_method_options',
+},
+
{ name => 'from_collapse_limit', type => 'int', context => 'PGC_USERSET', group => 'QUERY_TUNING_OTHER',
short_desc => 'Sets the FROM-list size beyond which subqueries are not collapsed.',
long_desc => 'The planner will merge subqueries into upper queries if the resulting FROM list would have no more than this many items.',
#include "storage/bufmgr.h"
#include "storage/bufpage.h"
#include "storage/copydir.h"
+#include "storage/fd.h"
#include "storage/io_worker.h"
#include "storage/large_object.h"
#include "storage/pg_shmem.h"
{NULL, 0, false}
};
+static const struct config_enum_entry file_extend_method_options[] = {
+#ifdef HAVE_POSIX_FALLOCATE
+ {"posix_fallocate", FILE_EXTEND_METHOD_POSIX_FALLOCATE, false},
+#endif
+ {"write_zeros", FILE_EXTEND_METHOD_WRITE_ZEROS, false},
+ {NULL, 0, false}
+};
+
/*
* Options for enum values stored in other modules
*/
# in kilobytes, or -1 for no limit
#file_copy_method = copy # copy, clone (if supported by OS)
+#file_extend_method = posix_fallocate # the default is the first option supported
+ # by the operating system:
+ # posix_fallocate (most Unix-like systems)
+ # write_zeros
#max_notify_queue_pages = 1048576 # limits the number of SLRU pages allocated
# for NOTIFY / LISTEN queue
#define IO_DIRECT_WAL 0x02
#define IO_DIRECT_WAL_INIT 0x04
+enum FileExtendMethod
+{
+#ifdef HAVE_POSIX_FALLOCATE
+ FILE_EXTEND_METHOD_POSIX_FALLOCATE,
+#endif
+ FILE_EXTEND_METHOD_WRITE_ZEROS,
+};
+
+/* Default to the first available file_extend_method. */
+#define DEFAULT_FILE_EXTEND_METHOD 0
/* GUC parameter */
extern PGDLLIMPORT int max_files_per_process;
extern PGDLLIMPORT bool data_sync_retry;
extern PGDLLIMPORT int recovery_init_sync_method;
extern PGDLLIMPORT int io_direct_flags;
+extern PGDLLIMPORT int file_extend_method;
/*
* This is private to fd.c, but exported for save/restore_backend_variables()