From: Thomas Munro Date: Sat, 31 May 2025 10:50:22 +0000 (+1200) Subject: Add file_extend_method=posix_fallocate,write_zeros. X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=f94e9141a0bbb365f8194517e142746466ee7014;p=thirdparty%2Fpostgresql.git Add file_extend_method=posix_fallocate,write_zeros. Provide a way to disable the use of posix_fallocate() for relation files. It was introduced by commit 4d330a61bb1. The new setting file_extend_method=write_zeros can be used as a workaround for problems reported from the field: * BTRFS compression is disabled by the use of posix_fallocate() * XFS could produce spurious ENOSPC errors in some Linux kernel versions, though that problem is reported to have been fixed The default is file_extend_method=posix_fallocate if available, as before. The write_zeros option is similar to PostgreSQL < 16, except that now it's multi-block. Backpatch-through: 16 Reviewed-by: Jakub Wartak Reported-by: Dimitrios Apostolou Discussion: https://postgr.es/m/b1843124-fd22-e279-a31f-252dffb6fbf2%40gmx.net --- diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index 0cacc062cdd..f1af1505cf3 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -2412,6 +2412,43 @@ include_dir 'conf.d' + + file_extend_method (enum) + + file_extend_method configuration parameter + + + + + Specifies the method used to extend data files during bulk operations + such as COPY. The first available option is used as + the default, depending on the operating system: + + + + posix_fallocate (Unix) uses the standard POSIX + interface for allocating disk space, but is missing on some systems. + If it is present but the underlying file system doesn't support it, + this option silently falls back to write_zeros. + Current versions of BTRFS are known to disable compression when + this option is used. + This is the default on systems that have the function. + + + + + write_zeros extends files by writing out blocks + of zero bytes. This is the default on systems that don't have the + function posix_fallocate. + + + + The write_zeros method is always used when data + files are extended by 8 blocks or fewer. + + + + max_notify_queue_pages (integer) diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c index 0f8083651de..5d07b64a1ef 100644 --- a/src/backend/storage/file/fd.c +++ b/src/backend/storage/file/fd.c @@ -164,6 +164,9 @@ bool data_sync_retry = false; /* How SyncDataDirectory() should do its job. */ int recovery_init_sync_method = DATA_DIR_SYNC_METHOD_FSYNC; +/* How data files should be bulk-extended with zeros. */ +int file_extend_method = DEFAULT_FILE_EXTEND_METHOD; + /* Which kinds of files should be opened with PG_O_DIRECT. */ int io_direct_flags; diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c index a2625871185..443434e4ea8 100644 --- a/src/backend/storage/smgr/md.c +++ b/src/backend/storage/smgr/md.c @@ -602,13 +602,24 @@ mdzeroextend(SMgrRelation reln, ForkNumber forknum, * that decision should be made though? For now just use a cutoff of * 8, anything between 4 and 8 worked OK in some local testing. */ - if (numblocks > 8) + if (numblocks > 8 && + file_extend_method != FILE_EXTEND_METHOD_WRITE_ZEROS) { - int ret; + int ret = 0; - ret = FileFallocate(v->mdfd_vfd, - seekpos, (pgoff_t) BLCKSZ * numblocks, - WAIT_EVENT_DATA_FILE_EXTEND); +#ifdef HAVE_POSIX_FALLOCATE + if (file_extend_method == FILE_EXTEND_METHOD_POSIX_FALLOCATE) + { + ret = FileFallocate(v->mdfd_vfd, + seekpos, (pgoff_t) BLCKSZ * numblocks, + WAIT_EVENT_DATA_FILE_EXTEND); + } + else +#endif + { + elog(ERROR, "unsupported file_extend_method: %d", + file_extend_method); + } if (ret != 0) { ereport(ERROR, diff --git a/src/backend/utils/misc/guc_parameters.dat b/src/backend/utils/misc/guc_parameters.dat index f0260e6e412..c1f1603cd39 100644 --- a/src/backend/utils/misc/guc_parameters.dat +++ b/src/backend/utils/misc/guc_parameters.dat @@ -1042,6 +1042,13 @@ options => 'file_copy_method_options', }, +{ name => 'file_extend_method', type => 'enum', context => 'PGC_SIGHUP', group => 'RESOURCES_DISK', + short_desc => 'Selects the method used for extending data files.', + variable => 'file_extend_method', + boot_val => 'DEFAULT_FILE_EXTEND_METHOD', + options => 'file_extend_method_options', +}, + { name => 'from_collapse_limit', type => 'int', context => 'PGC_USERSET', group => 'QUERY_TUNING_OTHER', short_desc => 'Sets the FROM-list size beyond which subqueries are not collapsed.', long_desc => 'The planner will merge subqueries into upper queries if the resulting FROM list would have no more than this many items.', diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c index 13c569d8790..5df3a36bf64 100644 --- a/src/backend/utils/misc/guc_tables.c +++ b/src/backend/utils/misc/guc_tables.c @@ -80,6 +80,7 @@ #include "storage/bufmgr.h" #include "storage/bufpage.h" #include "storage/copydir.h" +#include "storage/fd.h" #include "storage/io_worker.h" #include "storage/large_object.h" #include "storage/pg_shmem.h" @@ -491,6 +492,14 @@ static const struct config_enum_entry file_copy_method_options[] = { {NULL, 0, false} }; +static const struct config_enum_entry file_extend_method_options[] = { +#ifdef HAVE_POSIX_FALLOCATE + {"posix_fallocate", FILE_EXTEND_METHOD_POSIX_FALLOCATE, false}, +#endif + {"write_zeros", FILE_EXTEND_METHOD_WRITE_ZEROS, false}, + {NULL, 0, false} +}; + /* * Options for enum values stored in other modules */ diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index c4f92fcdac8..1ae594af843 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -179,6 +179,10 @@ # in kilobytes, or -1 for no limit #file_copy_method = copy # copy, clone (if supported by OS) +#file_extend_method = posix_fallocate # the default is the first option supported + # by the operating system: + # posix_fallocate (most Unix-like systems) + # write_zeros #max_notify_queue_pages = 1048576 # limits the number of SLRU pages allocated # for NOTIFY / LISTEN queue diff --git a/src/include/storage/fd.h b/src/include/storage/fd.h index 413233bcd39..8ac466fd346 100644 --- a/src/include/storage/fd.h +++ b/src/include/storage/fd.h @@ -55,12 +55,23 @@ typedef int File; #define IO_DIRECT_WAL 0x02 #define IO_DIRECT_WAL_INIT 0x04 +enum FileExtendMethod +{ +#ifdef HAVE_POSIX_FALLOCATE + FILE_EXTEND_METHOD_POSIX_FALLOCATE, +#endif + FILE_EXTEND_METHOD_WRITE_ZEROS, +}; + +/* Default to the first available file_extend_method. */ +#define DEFAULT_FILE_EXTEND_METHOD 0 /* GUC parameter */ extern PGDLLIMPORT int max_files_per_process; extern PGDLLIMPORT bool data_sync_retry; extern PGDLLIMPORT int recovery_init_sync_method; extern PGDLLIMPORT int io_direct_flags; +extern PGDLLIMPORT int file_extend_method; /* * This is private to fd.c, but exported for save/restore_backend_variables()