From: Michael Paquier Date: Tue, 30 Dec 2025 06:38:50 +0000 (+0900) Subject: Add pg_get_multixact_stats() X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=97b101776ce23dd6c4abbdae213806bc24ed6133;p=thirdparty%2Fpostgresql.git Add pg_get_multixact_stats() This new function exposes at SQL level some information related to multixacts, not available until now. This data is useful for monitoring purposes, especially for workloads that make a heavy use of multixacts: - num_mxids, number of MultiXact IDs in use. - num_members, number of member entries in use. - members_size, bytes used by num_members in pg_multixact/members/. - oldest_multixact: oldest MultiXact still needed. This patch has been originally proposed when MultiXactOffset was still 32 bits, to monitor wraparound. This part is not relevant anymore since bd8d9c9bdfa0 that has widen MultiXactOffset to 64 bits. The monitoring of disk space usage for the members is still relevant. Some tests are added to check this function, in the shape of one isolation test with concurrent transactions that take a ROW SHARE lock, and some SQL tests for pg_read_all_stats. Some documentation is added to explain some patterns that can come from the information provided by the function. Bump catalog version. Author: Naga Appani Reviewed-by: Ashutosh Bapat Reviewed-by: Michael Paquier Reviewed-by: Atsushi Torikoshi Discussion: https://postgr.es/m/CA+QeY+AAsYK6WvBW4qYzHz4bahHycDAY_q5ECmHkEV_eB9ckzg@mail.gmail.com --- diff --git a/doc/src/sgml/func/func-info.sgml b/doc/src/sgml/func/func-info.sgml index d4508114a48..175f18315cd 100644 --- a/doc/src/sgml/func/func-info.sgml +++ b/doc/src/sgml/func/func-info.sgml @@ -2975,6 +2975,39 @@ acl | {postgres=arwdDxtm/postgres,foo=r/postgres} modify key columns. + + + + + pg_get_multixact_stats + + pg_get_multixact_stats () + record + ( num_mxids integer, + num_members bigint, + members_size bigint, + oldest_multixact xid ) + + + Returns statistics about current multixact usage: + num_mxids is the total number of multixact IDs + currently present in the system, num_members is + the total number of multixact member entries currently present in + the system, members_size is the storage occupied + by num_members in the + pg_multixact/members directory, + oldest_multixact is the oldest multixact ID still + in use. + + + The function reports statistics at the time it is invoked. Values may + vary between calls, even within a single transaction. + + + To use this function, you must have privileges of the + pg_read_all_stats role. + + diff --git a/doc/src/sgml/maintenance.sgml b/doc/src/sgml/maintenance.sgml index 08e6489afb8..7c958b06273 100644 --- a/doc/src/sgml/maintenance.sgml +++ b/doc/src/sgml/maintenance.sgml @@ -813,12 +813,41 @@ HINT: Execute a database-wide VACUUM in that database. As a safety device, an aggressive vacuum scan will occur for any table whose multixact-age is greater than . Also, if the - storage occupied by multixacts members exceeds about 10GB, aggressive vacuum + linkend="guc-autovacuum-multixact-freeze-max-age"/>. Also, if the number + of multixact member entries created exceeds approximately 2 billion + entries (occupying roughly 10GB in the + pg_multixact/members directory), aggressive vacuum scans will occur more often for all tables, starting with those that - have the oldest multixact-age. Both of these kinds of aggressive - scans will occur even if autovacuum is nominally disabled. The members storage - area can grow up to about 20GB before reaching wraparound. + have the oldest multixact-age. Both of these kinds of aggressive + scans will occur even if autovacuum is nominally disabled. At approximately + 4 billion entries (occupying roughly 20GB in the + pg_multixact/members directory), even more aggressive + vacuum scans are triggered to reclaim member storage space. + + + + The pg_get_multixact_stats() function described in + provides a way to monitor + multixact allocation and usage patterns in real time, for example: + +=# SELECT *, pg_size_pretty(members_size) members_size_pretty + FROM pg_catalog.pg_get_multixact_stats(); + num_mxids | num_members | members_size | oldest_multixact | members_size_pretty +-----------+-------------+--------------+------------------+--------------------- + 311740299 | 2785241176 | 13926205880 | 2 | 13 GB +(1 row) + + This output shows a system with significant multixact activity: about + 312 million multixact IDs and about 2.8 billion member entries consuming + 13 GB of storage space. + A spike in num_mxids might indicate multiple sessions + running UPDATE statements with foreign key checks, + concurrent SELECT FOR SHARE operations, or frequent + use of savepoints causing lock contention. + If oldest_multixact value remains unchanged while + num_members grows, it could indicate that long-running + transactions are preventing cleanup, or autovacuum is + not keeping up with the workload. diff --git a/src/backend/utils/adt/multixactfuncs.c b/src/backend/utils/adt/multixactfuncs.c index a428e140bc4..97221614ffa 100644 --- a/src/backend/utils/adt/multixactfuncs.c +++ b/src/backend/utils/adt/multixactfuncs.c @@ -14,8 +14,13 @@ #include "postgres.h" +#include "access/htup_details.h" #include "access/multixact.h" +#include "access/multixact_internal.h" +#include "catalog/pg_authid_d.h" #include "funcapi.h" +#include "miscadmin.h" +#include "utils/acl.h" #include "utils/builtins.h" /* @@ -85,3 +90,51 @@ pg_get_multixact_members(PG_FUNCTION_ARGS) SRF_RETURN_DONE(funccxt); } + +/* + * pg_get_multixact_stats + * + * Returns statistics about current multixact usage. + */ +Datum +pg_get_multixact_stats(PG_FUNCTION_ARGS) +{ + TupleDesc tupdesc; + Datum values[4]; + bool nulls[4]; + uint64 members; + MultiXactId oldestMultiXactId; + uint32 multixacts; + MultiXactOffset oldestOffset; + MultiXactOffset nextOffset; + uint64 membersBytes; + + if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("return type must be a row type"))); + + GetMultiXactInfo(&multixacts, &nextOffset, &oldestMultiXactId, &oldestOffset); + members = nextOffset - oldestOffset; + + membersBytes = MultiXactOffsetStorageSize(nextOffset, oldestOffset); + + if (!has_privs_of_role(GetUserId(), ROLE_PG_READ_ALL_STATS)) + { + /* + * Only superusers and roles with privileges of pg_read_all_stats can + * see details. + */ + memset(nulls, true, sizeof(bool) * tupdesc->natts); + } + else + { + values[0] = UInt32GetDatum(multixacts); + values[1] = Int64GetDatum(members); + values[2] = Int64GetDatum(membersBytes); + values[3] = UInt32GetDatum(oldestMultiXactId); + memset(nulls, false, sizeof(nulls)); + } + + return HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls)); +} diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index d1b2e1cd2d1..5f7886ab3ea 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -57,6 +57,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 202512093 +#define CATALOG_VERSION_NO 202512301 #endif diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index fd9448ec7b9..60f7ce502f6 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -6589,6 +6589,13 @@ proallargtypes => '{xid,xid,text}', proargmodes => '{i,o,o}', proargnames => '{multixid,xid,mode}', prosrc => 'pg_get_multixact_members' }, +{ oid => '9001', descr => 'get current multixact usage statistics', + proname => 'pg_get_multixact_stats', provolatile => 'v', + prorettype => 'record', proargtypes => '', + proallargtypes => '{int8,int8,int8,xid}', proargmodes => '{o,o,o,o}', + proargnames => '{num_mxids,num_members,members_size,oldest_multixact}', + prosrc => 'pg_get_multixact_stats' }, + { oid => '3581', descr => 'get commit timestamp of a transaction', proname => 'pg_xact_commit_timestamp', provolatile => 'v', prorettype => 'timestamptz', proargtypes => 'xid', diff --git a/src/test/isolation/expected/multixact-stats.out b/src/test/isolation/expected/multixact-stats.out new file mode 100644 index 00000000000..27a6510c4ad --- /dev/null +++ b/src/test/isolation/expected/multixact-stats.out @@ -0,0 +1,89 @@ +Parsed test spec with 2 sessions + +starting permutation: snap0 s1_begin s1_lock snap1 s2_begin s2_lock snap2 check_while_pinned s1_commit s2_commit +step snap0: + CREATE TEMP TABLE snap0 AS + SELECT num_mxids, num_members, oldest_multixact + FROM pg_get_multixact_stats(); + +step s1_begin: BEGIN; +step s1_lock: SELECT 1 FROM mxq WHERE id=1 FOR KEY SHARE; +?column? +-------- + 1 +(1 row) + +step snap1: + CREATE TEMP TABLE snap1 AS + SELECT num_mxids, num_members, oldest_multixact + FROM pg_get_multixact_stats(); + +step s2_begin: BEGIN; +step s2_lock: SELECT 1 FROM mxq WHERE id=1 FOR KEY SHARE; +?column? +-------- + 1 +(1 row) + +step snap2: + CREATE TEMP TABLE snap2 AS + SELECT num_mxids, num_members, oldest_multixact + FROM pg_get_multixact_stats(); + +step check_while_pinned: + SELECT r.assertion, r.ok + FROM snap0 s0 + JOIN snap1 s1 ON TRUE + JOIN snap2 s2 ON TRUE, + LATERAL unnest( + ARRAY[ + 'is_init_mxids', + 'is_init_members', + 'is_init_oldest_mxid', + 'is_init_oldest_off', + 'is_oldest_mxid_nondec_01', + 'is_oldest_mxid_nondec_12', + 'is_oldest_off_nondec_01', + 'is_oldest_off_nondec_12', + 'is_members_increased_ge1', + 'is_mxids_nondec_01', + 'is_mxids_nondec_12', + 'is_members_nondec_01', + 'is_members_nondec_12' + ], + ARRAY[ + (s2.num_mxids IS NOT NULL), + (s2.num_members IS NOT NULL), + (s2.oldest_multixact IS NOT NULL), + + (s1.oldest_multixact::text::bigint >= COALESCE(s0.oldest_multixact::text::bigint, 0)), + (s2.oldest_multixact::text::bigint >= COALESCE(s1.oldest_multixact::text::bigint, 0)), + + (s2.num_members >= COALESCE(s1.num_members, 0) + 1), + + (s1.num_mxids >= COALESCE(s0.num_mxids, 0)), + (s2.num_mxids >= COALESCE(s1.num_mxids, 0)), + (s1.num_members >= COALESCE(s0.num_members, 0)), + (s2.num_members >= COALESCE(s1.num_members, 0)) + ] + ) AS r(assertion, ok); + +assertion |ok +------------------------+-- +is_init_mxids |t +is_init_members |t +is_init_oldest_mxid |t +is_init_oldest_off |t +is_oldest_mxid_nondec_01|t +is_oldest_mxid_nondec_12|t +is_oldest_off_nondec_01 |t +is_oldest_off_nondec_12 |t +is_members_increased_ge1|t +is_mxids_nondec_01 |t +is_mxids_nondec_12 | +is_members_nondec_01 | +is_members_nondec_12 | +(13 rows) + +step s1_commit: COMMIT; +step s2_commit: COMMIT; diff --git a/src/test/isolation/isolation_schedule b/src/test/isolation/isolation_schedule index f2e067b1fbc..01ff1c6586f 100644 --- a/src/test/isolation/isolation_schedule +++ b/src/test/isolation/isolation_schedule @@ -63,6 +63,7 @@ test: delete-abort-savept-2 test: aborted-keyrevoke test: multixact-no-deadlock test: multixact-no-forget +test: multixact-stats test: lock-committed-update test: lock-committed-keyupdate test: update-locked-tuple diff --git a/src/test/isolation/specs/multixact-stats.spec b/src/test/isolation/specs/multixact-stats.spec new file mode 100644 index 00000000000..07d4b11be6d --- /dev/null +++ b/src/test/isolation/specs/multixact-stats.spec @@ -0,0 +1,111 @@ +# Test for pg_get_multixact_stats() +# +# This test creates one multixact on a brand-new table. While the multixact +# is pinned by two open transactions, we check some patterns that VACUUM and +# FREEZE cannot violate: +# 1) "members" increased by at least 1 when the second session locked the row. +# 2) (num_mxids / num_members) not decreased compared to earlier snapshots. +# 3) "oldest_*" fields never decreased. +# +# This test does not run checks after releasing locks, as freezing and/or +# truncation may shrink the multixact ranges calculated. + +setup +{ + CREATE TABLE mxq(id int PRIMARY KEY, v int); + INSERT INTO mxq VALUES (1, 42); +} + +teardown +{ + DROP TABLE mxq; +} + +# Two sessions that lock the same tuple, leading to one multixact with +# at least 2 members. +session "s1" +setup { SET client_min_messages = warning; SET lock_timeout = '5s'; } +step s1_begin { BEGIN; } +step s1_lock { SELECT 1 FROM mxq WHERE id=1 FOR KEY SHARE; } +step s1_commit { COMMIT; } + +session "s2" +setup { SET client_min_messages = warning; SET lock_timeout = '5s'; } +step s2_begin { BEGIN; } +step s2_lock { SELECT 1 FROM mxq WHERE id=1 FOR KEY SHARE; } +step s2_commit { COMMIT; } + +# Save multixact state *BEFORE* any locking; some of these may be NULLs if +# multixacts have not initialized yet. +step snap0 { + CREATE TEMP TABLE snap0 AS + SELECT num_mxids, num_members, oldest_multixact + FROM pg_get_multixact_stats(); +} + +# Save multixact state after s1 has locked the row. +step snap1 { + CREATE TEMP TABLE snap1 AS + SELECT num_mxids, num_members, oldest_multixact + FROM pg_get_multixact_stats(); +} + +# Save multixact state after s2 joins to lock the same row, leading to +# a multixact with at least 2 members. +step snap2 { + CREATE TEMP TABLE snap2 AS + SELECT num_mxids, num_members, oldest_multixact + FROM pg_get_multixact_stats(); +} + +# Pretty, deterministic key/value outputs based of boolean checks: +# is_init_mxids : num_mxids not NULL +# is_init_members : num_members not NULL +# is_init_oldest_mxid : oldest_multixact not NULL +# is_oldest_mxid_nondec_01 : oldest_multixact not decreased (snap0->snap1) +# is_oldest_mxid_nondec_12 : oldest_multixact did not decreased (snap1->snap2) +# is_members_increased_ge1 : members increased by at least 1 when s2 joined +# is_mxids_nondec_01 : num_mxids not decreased (snap0->snap1) +# is_mxids_nondec_12 : num_mxids not decreased (snap1->snap2) +# is_members_nondec_01 : num_members not decreased (snap0->snap1) +# is_members_nondec_12 : num_members not decreased (snap1->snap2) +step check_while_pinned { + SELECT r.assertion, r.ok + FROM snap0 s0 + JOIN snap1 s1 ON TRUE + JOIN snap2 s2 ON TRUE, + LATERAL unnest( + ARRAY[ + 'is_init_mxids', + 'is_init_members', + 'is_init_oldest_mxid', + 'is_init_oldest_off', + 'is_oldest_mxid_nondec_01', + 'is_oldest_mxid_nondec_12', + 'is_oldest_off_nondec_01', + 'is_oldest_off_nondec_12', + 'is_members_increased_ge1', + 'is_mxids_nondec_01', + 'is_mxids_nondec_12', + 'is_members_nondec_01', + 'is_members_nondec_12' + ], + ARRAY[ + (s2.num_mxids IS NOT NULL), + (s2.num_members IS NOT NULL), + (s2.oldest_multixact IS NOT NULL), + + (s1.oldest_multixact::text::bigint >= COALESCE(s0.oldest_multixact::text::bigint, 0)), + (s2.oldest_multixact::text::bigint >= COALESCE(s1.oldest_multixact::text::bigint, 0)), + + (s2.num_members >= COALESCE(s1.num_members, 0) + 1), + + (s1.num_mxids >= COALESCE(s0.num_mxids, 0)), + (s2.num_mxids >= COALESCE(s1.num_mxids, 0)), + (s1.num_members >= COALESCE(s0.num_members, 0)), + (s2.num_members >= COALESCE(s1.num_members, 0)) + ] + ) AS r(assertion, ok); +} + +permutation snap0 s1_begin s1_lock snap1 s2_begin s2_lock snap2 check_while_pinned s1_commit s2_commit diff --git a/src/test/regress/expected/misc_functions.out b/src/test/regress/expected/misc_functions.out index d7d965d884a..6c03b1a79d7 100644 --- a/src/test/regress/expected/misc_functions.out +++ b/src/test/regress/expected/misc_functions.out @@ -999,3 +999,32 @@ SELECT test_relpath(); SELECT pg_replication_origin_create('regress_' || repeat('a', 505)); ERROR: replication origin name is too long DETAIL: Replication origin names must be no longer than 512 bytes. +-- pg_get_multixact_stats tests +CREATE ROLE regress_multixact_funcs; +-- Access granted for superusers. +SELECT oldest_multixact IS NULL AS null_result FROM pg_get_multixact_stats(); + null_result +------------- + f +(1 row) + +-- Access revoked. +SET ROLE regress_multixact_funcs; +SELECT oldest_multixact IS NULL AS null_result FROM pg_get_multixact_stats(); + null_result +------------- + t +(1 row) + +RESET ROLE; +-- Access granted for users with pg_monitor rights. +GRANT pg_monitor TO regress_multixact_funcs; +SET ROLE regress_multixact_funcs; +SELECT oldest_multixact IS NULL AS null_result FROM pg_get_multixact_stats(); + null_result +------------- + f +(1 row) + +RESET ROLE; +DROP ROLE regress_multixact_funcs; diff --git a/src/test/regress/sql/misc_functions.sql b/src/test/regress/sql/misc_functions.sql index 0fc20fbb6b4..35b7983996c 100644 --- a/src/test/regress/sql/misc_functions.sql +++ b/src/test/regress/sql/misc_functions.sql @@ -459,3 +459,18 @@ SELECT test_relpath(); -- pg_replication_origin.roname limit SELECT pg_replication_origin_create('regress_' || repeat('a', 505)); + +-- pg_get_multixact_stats tests +CREATE ROLE regress_multixact_funcs; +-- Access granted for superusers. +SELECT oldest_multixact IS NULL AS null_result FROM pg_get_multixact_stats(); +-- Access revoked. +SET ROLE regress_multixact_funcs; +SELECT oldest_multixact IS NULL AS null_result FROM pg_get_multixact_stats(); +RESET ROLE; +-- Access granted for users with pg_monitor rights. +GRANT pg_monitor TO regress_multixact_funcs; +SET ROLE regress_multixact_funcs; +SELECT oldest_multixact IS NULL AS null_result FROM pg_get_multixact_stats(); +RESET ROLE; +DROP ROLE regress_multixact_funcs;