From: Michael Paquier Date: Tue, 27 Jan 2026 04:42:32 +0000 (+0900) Subject: Include extended statistics data in pg_dump X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=c32fb29e979db4a7b92adb29007725eeacf91f64;p=thirdparty%2Fpostgresql.git Include extended statistics data in pg_dump This commit integrates the new pg_restore_extended_stats() function into pg_dump, so as the data of extended statistics is detected and included in dumps when the --statistics switch is specified. Currently, the same extended stats kinds as the ones supported by the SQL function can be dumped: "n_distinct" and "dependencies". The extended statistics data can be dumped down to PostgreSQL 10, with the following changes depending on the backend version dealt with: - In v19 and newer versions, the format of pg_ndistinct and pg_dependencies has changed, catalogs can be directly queried. - In v18 and older versions, the format is translated to the new format supported by the backend. - In v14 and older versions, inherited extended statistics are not supported. - In v11 and older versions, the data for ndistinct and dependencies was stored in pg_statistic_ext. These have been moved to pg_stats_ext in v12. - Extended Statistics have been introduced in v10, no support is needed for versions older than that. The extended statistics data is dumped if it can be found in the catalogs. If the catalogs are empty, then no restore of the stats data is attempted. Author: Corey Huinker Reviewed-by: Michael Paquier Discussion: https://postgr.es/m/CADkLM=dpz3KFnqP-dgJ-zvRvtjsa8UZv8wDAQdqho=qN3kX0Zg@mail.gmail.com --- diff --git a/src/bin/pg_dump/pg_backup.h b/src/bin/pg_dump/pg_backup.h index d9041dad720..2f8d9799c30 100644 --- a/src/bin/pg_dump/pg_backup.h +++ b/src/bin/pg_dump/pg_backup.h @@ -68,6 +68,7 @@ enum _dumpPreparedQueries PREPQUERY_DUMPCOMPOSITETYPE, PREPQUERY_DUMPDOMAIN, PREPQUERY_DUMPENUMTYPE, + PREPQUERY_DUMPEXTSTATSOBJSTATS, PREPQUERY_DUMPFUNC, PREPQUERY_DUMPOPR, PREPQUERY_DUMPRANGETYPE, diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c index 18d3822fd82..35d3a07915d 100644 --- a/src/bin/pg_dump/pg_backup_archiver.c +++ b/src/bin/pg_dump/pg_backup_archiver.c @@ -3007,7 +3007,8 @@ _tocEntryRequired(TocEntry *te, teSection curSection, ArchiveHandle *AH) strcmp(te->desc, "SEARCHPATH") == 0) return REQ_SPECIAL; - if (strcmp(te->desc, "STATISTICS DATA") == 0) + if ((strcmp(te->desc, "STATISTICS DATA") == 0) || + (strcmp(te->desc, "EXTENDED STATISTICS DATA") == 0)) { if (!ropt->dumpStatistics) return 0; diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index fff929b26df..078ee8500ad 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -71,6 +71,7 @@ #include "pg_backup_db.h" #include "pg_backup_utils.h" #include "pg_dump.h" +#include "statistics/statistics_format.h" #include "storage/block.h" typedef struct @@ -325,6 +326,7 @@ static void dumpSequenceData(Archive *fout, const TableDataInfo *tdinfo); static void dumpIndex(Archive *fout, const IndxInfo *indxinfo); static void dumpIndexAttach(Archive *fout, const IndexAttachInfo *attachinfo); static void dumpStatisticsExt(Archive *fout, const StatsExtInfo *statsextinfo); +static void dumpStatisticsExtStats(Archive *fout, const StatsExtInfo *statsextinfo); static void dumpConstraint(Archive *fout, const ConstraintInfo *coninfo); static void dumpTableConstraintComment(Archive *fout, const ConstraintInfo *coninfo); static void dumpTSParser(Archive *fout, const TSParserInfo *prsinfo); @@ -8284,6 +8286,9 @@ getExtendedStatistics(Archive *fout) /* Decide whether we want to dump it */ selectDumpableStatisticsObject(&(statsextinfo[i]), fout); + + if (fout->dopt->dumpStatistics) + statsextinfo[i].dobj.components |= DUMP_COMPONENT_STATISTICS; } PQclear(res); @@ -11738,6 +11743,7 @@ dumpDumpableObject(Archive *fout, DumpableObject *dobj) break; case DO_STATSEXT: dumpStatisticsExt(fout, (const StatsExtInfo *) dobj); + dumpStatisticsExtStats(fout, (const StatsExtInfo *) dobj); break; case DO_REFRESH_MATVIEW: refreshMatViewData(fout, (const TableDataInfo *) dobj); @@ -18540,6 +18546,209 @@ dumpStatisticsExt(Archive *fout, const StatsExtInfo *statsextinfo) free(qstatsextname); } +/* + * dumpStatisticsExtStats + * write out to fout the stats for an extended statistics object + */ +static void +dumpStatisticsExtStats(Archive *fout, const StatsExtInfo *statsextinfo) +{ + DumpOptions *dopt = fout->dopt; + PQExpBuffer query; + PGresult *res; + int nstats; + + /* Do nothing if not dumping statistics */ + if (!dopt->dumpStatistics) + return; + + if (!fout->is_prepared[PREPQUERY_DUMPEXTSTATSOBJSTATS]) + { + PQExpBuffer pq = createPQExpBuffer(); + + /*--------- + * Set up query for details about extended statistics objects. + * + * The query depends on the backend version: + * - In v19 and newer versions, query directly the pg_stats_ext* + * catalogs. + * - In v18 and older versions, ndistinct and dependencies have a + * different format that needs translation. + * - In v14 and older versions, inherited does not exist. + * - In v11 and older versions, there is no pg_stats_ext, hence + * the logic joins pg_statistic_ext and pg_namespace. + *--------- + */ + + appendPQExpBufferStr(pq, + "PREPARE getExtStatsStats(pg_catalog.name, pg_catalog.name) AS\n" + "SELECT "); + + /* + * Versions 15 and newer have inherited stats. + * + * Create this column in all versions because we need to order by it + * later. + */ + if (fout->remoteVersion >= 150000) + appendPQExpBufferStr(pq, "e.inherited, "); + else + appendPQExpBufferStr(pq, "false AS inherited, "); + + /*-------- + * The ndistinct and dependencies formats changed in v19, so + * everything before that needs to be translated. + * + * The ndistinct translation converts this kind of data: + * {"3, 4": 11, "3, 6": 11, "4, 6": 11, "3, 4, 6": 11} + * + * to this: + * [ {"attributes": [3,4], "ndistinct": 11}, + * {"attributes": [3,6], "ndistinct": 11}, + * {"attributes": [4,6], "ndistinct": 11}, + * {"attributes": [3,4,6], "ndistinct": 11} ] + * + * The dependencies translation converts this kind of data: + * {"3 => 4": 1.000000, "3 => 6": 1.000000, + * "4 => 6": 1.000000, "3, 4 => 6": 1.000000, + * "3, 6 => 4": 1.000000} + * + * to this: + * [ {"attributes": [3], "dependency": 4, "degree": 1.000000}, + * {"attributes": [3], "dependency": 6, "degree": 1.000000}, + * {"attributes": [4], "dependency": 6, "degree": 1.000000}, + * {"attributes": [3,4], "dependency": 6, "degree": 1.000000}, + * {"attributes": [3,6], "dependency": 4, "degree": 1.000000} ] + *-------- + */ + if (fout->remoteVersion >= 190000) + appendPQExpBufferStr(pq, "e.n_distinct, e.dependencies "); + else + appendPQExpBufferStr(pq, + "( " + "SELECT json_agg( " + " json_build_object( " + " '" PG_NDISTINCT_KEY_ATTRIBUTES "', " + " string_to_array(kv.key, ', ')::integer[], " + " '" PG_NDISTINCT_KEY_NDISTINCT "', " + " kv.value::bigint )) " + "FROM json_each_text(e.n_distinct::text::json) AS kv" + ") AS n_distinct, " + "( " + "SELECT json_agg( " + " json_build_object( " + " '" PG_DEPENDENCIES_KEY_ATTRIBUTES "', " + " string_to_array( " + " split_part(kv.key, ' => ', 1), " + " ', ')::integer[], " + " '" PG_DEPENDENCIES_KEY_DEPENDENCY "', " + " split_part(kv.key, ' => ', 2)::integer, " + " '" PG_DEPENDENCIES_KEY_DEGREE "', " + " kv.value::double precision )) " + "FROM json_each_text(e.dependencies::text::json) AS kv " + ") AS dependencies "); + + /* pg_stats_ext introduced in v12 */ + if (fout->remoteVersion >= 120000) + appendPQExpBufferStr(pq, + "FROM pg_catalog.pg_stats_ext AS e " + "WHERE e.statistics_schemaname = $1 " + "AND e.statistics_name = $2 "); + else + appendPQExpBufferStr(pq, + "FROM ( " + "SELECT s.stxndistinct AS n_distinct, " + " s.stxdependencies AS dependencies " + "FROM pg_catalog.pg_statistic_ext AS s " + "JOIN pg_catalog.pg_namespace AS n " + "ON n.oid = s.stxnamespace " + "WHERE n.nspname = $1 " + "AND s.stxname = $2 " + ") AS e "); + + /* we always have an inherited column, but it may be a constant */ + appendPQExpBufferStr(pq, "ORDER BY inherited"); + + ExecuteSqlStatement(fout, pq->data); + + fout->is_prepared[PREPQUERY_DUMPEXTSTATSOBJSTATS] = true; + + destroyPQExpBuffer(pq); + } + + query = createPQExpBuffer(); + + appendPQExpBufferStr(query, "EXECUTE getExtStatsStats("); + appendStringLiteralAH(query, statsextinfo->dobj.namespace->dobj.name, fout); + appendPQExpBufferStr(query, "::pg_catalog.name, "); + appendStringLiteralAH(query, statsextinfo->dobj.name, fout); + appendPQExpBufferStr(query, "::pg_catalog.name)"); + + res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK); + + destroyPQExpBuffer(query); + + nstats = PQntuples(res); + + if (nstats > 0) + { + PQExpBuffer out = createPQExpBuffer(); + + int i_inherited = PQfnumber(res, "inherited"); + int i_ndistinct = PQfnumber(res, "n_distinct"); + int i_dependencies = PQfnumber(res, "dependencies"); + + for (int i = 0; i < nstats; i++) + { + TableInfo *tbinfo = statsextinfo->stattable; + + if (PQgetisnull(res, i, i_inherited)) + pg_fatal("inherited cannot be NULL"); + + appendPQExpBufferStr(out, + "SELECT * FROM pg_catalog.pg_restore_extended_stats(\n"); + appendPQExpBuffer(out, "\t'version', '%d'::integer,\n", + fout->remoteVersion); + + /* Relation information */ + appendPQExpBufferStr(out, "\t'schemaname', "); + appendStringLiteralAH(out, tbinfo->dobj.namespace->dobj.name, fout); + appendPQExpBufferStr(out, ",\n\t'relname', "); + appendStringLiteralAH(out, tbinfo->dobj.name, fout); + + /* Extended statistics information */ + appendPQExpBufferStr(out, ",\n\t'statistics_schemaname', "); + appendStringLiteralAH(out, statsextinfo->dobj.namespace->dobj.name, fout); + appendPQExpBufferStr(out, ",\n\t'statistics_name', "); + appendStringLiteralAH(out, statsextinfo->dobj.name, fout); + appendNamedArgument(out, fout, "inherited", "boolean", + PQgetvalue(res, i, i_inherited)); + + if (!PQgetisnull(res, i, i_ndistinct)) + appendNamedArgument(out, fout, "n_distinct", "pg_ndistinct", + PQgetvalue(res, i, i_ndistinct)); + + if (!PQgetisnull(res, i, i_dependencies)) + appendNamedArgument(out, fout, "dependencies", "pg_dependencies", + PQgetvalue(res, i, i_dependencies)); + + appendPQExpBufferStr(out, "\n);\n"); + } + + ArchiveEntry(fout, nilCatalogId, createDumpId(), + ARCHIVE_OPTS(.tag = statsextinfo->dobj.name, + .namespace = statsextinfo->dobj.namespace->dobj.name, + .owner = statsextinfo->rolname, + .description = "EXTENDED STATISTICS DATA", + .section = SECTION_POST_DATA, + .createStmt = out->data, + .deps = &statsextinfo->dobj.dumpId, + .nDeps = 1)); + destroyPQExpBuffer(out); + } + PQclear(res); +} + /* * dumpConstraint * write out to fout a user-defined constraint diff --git a/src/bin/pg_dump/t/002_pg_dump.pl b/src/bin/pg_dump/t/002_pg_dump.pl index 28812d28aa9..a8dcc2b5c75 100644 --- a/src/bin/pg_dump/t/002_pg_dump.pl +++ b/src/bin/pg_dump/t/002_pg_dump.pl @@ -4772,6 +4772,34 @@ my %tests = ( }, }, + # + # EXTENDED stats will end up in SECTION_POST_DATA. + # + 'extended_statistics_import' => { + create_sql => ' + CREATE TABLE dump_test.has_ext_stats + AS SELECT g.g AS x, g.g / 2 AS y FROM generate_series(1,100) AS g(g); + CREATE STATISTICS dump_test.es1 ON x, (y % 2) FROM dump_test.has_ext_stats; + ANALYZE dump_test.has_ext_stats;', + regexp => qr/^ + \QSELECT * FROM pg_catalog.pg_restore_extended_stats(\E\s+/xm, + like => { + %full_runs, + %dump_test_schema_runs, + no_data_no_schema => 1, + no_schema => 1, + section_post_data => 1, + statistics_only => 1, + schema_only_with_statistics => 1, + }, + unlike => { + exclude_dump_test_schema => 1, + no_statistics => 1, + only_dump_measurement => 1, + schema_only => 1, + }, + }, + # # While attribute stats (aka pg_statistic stats) only appear for tables # that have been analyzed, all tables will have relation stats because diff --git a/src/test/perl/PostgreSQL/Test/AdjustUpgrade.pm b/src/test/perl/PostgreSQL/Test/AdjustUpgrade.pm index b8e641cc1cb..5cc7a0b50ae 100644 --- a/src/test/perl/PostgreSQL/Test/AdjustUpgrade.pm +++ b/src/test/perl/PostgreSQL/Test/AdjustUpgrade.pm @@ -353,8 +353,8 @@ sub adjust_old_dumpfile # Version comments will certainly not match. $dump =~ s/^-- Dumped from database version.*\n//mg; - # Same with version argument to pg_restore_relation_stats() or - # pg_restore_attribute_stats(). + # Same with version argument to pg_restore_relation_stats(), + # pg_restore_attribute_stats() or pg_restore_extended_stats(). $dump =~ s {\n(\s+'version',) '\d+'::integer,$} {$1 '000000'::integer,}mg; @@ -703,8 +703,8 @@ sub adjust_new_dumpfile # Version comments will certainly not match. $dump =~ s/^-- Dumped from database version.*\n//mg; - # Same with version argument to pg_restore_relation_stats() or - # pg_restore_attribute_stats(). + # Same with version argument to pg_restore_relation_stats(), + # pg_restore_attribute_stats() or pg_restore_extended_stats(). $dump =~ s {\n(\s+'version',) '\d+'::integer,$} {$1 '000000'::integer,}mg;