]> git.ipfire.org Git - thirdparty/postgresql.git/commitdiff
Include extended statistics data in pg_dump
authorMichael Paquier <michael@paquier.xyz>
Tue, 27 Jan 2026 04:42:32 +0000 (13:42 +0900)
committerMichael Paquier <michael@paquier.xyz>
Tue, 27 Jan 2026 04:42:32 +0000 (13:42 +0900)
This commit integrates the new pg_restore_extended_stats() function into
pg_dump, so as the data of extended statistics is detected and included
in dumps when the --statistics switch is specified.  Currently, the same
extended stats kinds as the ones supported by the SQL function can be
dumped: "n_distinct" and "dependencies".

The extended statistics data can be dumped down to PostgreSQL 10, with
the following changes depending on the backend version dealt with:
- In v19 and newer versions, the format of pg_ndistinct and
pg_dependencies has changed, catalogs can be directly queried.
- In v18 and older versions, the format is translated to the new format
supported by the backend.
- In v14 and older versions, inherited extended statistics are not
supported.
- In v11 and older versions, the data for ndistinct and dependencies
was stored in pg_statistic_ext.  These have been moved to pg_stats_ext
in v12.
- Extended Statistics have been introduced in v10, no support is needed
for versions older than that.

The extended statistics data is dumped if it can be found in the
catalogs.  If the catalogs are empty, then no restore of the stats data
is attempted.

Author: Corey Huinker <corey.huinker@gmail.com>
Reviewed-by: Michael Paquier <michael@paquier.xyz>
Discussion: https://postgr.es/m/CADkLM=dpz3KFnqP-dgJ-zvRvtjsa8UZv8wDAQdqho=qN3kX0Zg@mail.gmail.com

src/bin/pg_dump/pg_backup.h
src/bin/pg_dump/pg_backup_archiver.c
src/bin/pg_dump/pg_dump.c
src/bin/pg_dump/t/002_pg_dump.pl
src/test/perl/PostgreSQL/Test/AdjustUpgrade.pm

index d9041dad72068a0e6dc3205aa7cf2c0444d29f8a..2f8d9799c30c09fad7cdfceb4f3df5d87b7caa7d 100644 (file)
@@ -68,6 +68,7 @@ enum _dumpPreparedQueries
        PREPQUERY_DUMPCOMPOSITETYPE,
        PREPQUERY_DUMPDOMAIN,
        PREPQUERY_DUMPENUMTYPE,
+       PREPQUERY_DUMPEXTSTATSOBJSTATS,
        PREPQUERY_DUMPFUNC,
        PREPQUERY_DUMPOPR,
        PREPQUERY_DUMPRANGETYPE,
index 18d3822fd824e5a6586bbc41c7b6056159a2cfb2..35d3a07915d0757f5f26ba6d73339d5cb71dd8a5 100644 (file)
@@ -3007,7 +3007,8 @@ _tocEntryRequired(TocEntry *te, teSection curSection, ArchiveHandle *AH)
                strcmp(te->desc, "SEARCHPATH") == 0)
                return REQ_SPECIAL;
 
-       if (strcmp(te->desc, "STATISTICS DATA") == 0)
+       if ((strcmp(te->desc, "STATISTICS DATA") == 0) ||
+               (strcmp(te->desc, "EXTENDED STATISTICS DATA") == 0))
        {
                if (!ropt->dumpStatistics)
                        return 0;
index fff929b26dfb58b1f19885549f1592a64a86b433..078ee8500ad290618a47c859bcac1008cb36d61f 100644 (file)
@@ -71,6 +71,7 @@
 #include "pg_backup_db.h"
 #include "pg_backup_utils.h"
 #include "pg_dump.h"
+#include "statistics/statistics_format.h"
 #include "storage/block.h"
 
 typedef struct
@@ -325,6 +326,7 @@ static void dumpSequenceData(Archive *fout, const TableDataInfo *tdinfo);
 static void dumpIndex(Archive *fout, const IndxInfo *indxinfo);
 static void dumpIndexAttach(Archive *fout, const IndexAttachInfo *attachinfo);
 static void dumpStatisticsExt(Archive *fout, const StatsExtInfo *statsextinfo);
+static void dumpStatisticsExtStats(Archive *fout, const StatsExtInfo *statsextinfo);
 static void dumpConstraint(Archive *fout, const ConstraintInfo *coninfo);
 static void dumpTableConstraintComment(Archive *fout, const ConstraintInfo *coninfo);
 static void dumpTSParser(Archive *fout, const TSParserInfo *prsinfo);
@@ -8284,6 +8286,9 @@ getExtendedStatistics(Archive *fout)
 
                /* Decide whether we want to dump it */
                selectDumpableStatisticsObject(&(statsextinfo[i]), fout);
+
+               if (fout->dopt->dumpStatistics)
+                       statsextinfo[i].dobj.components |= DUMP_COMPONENT_STATISTICS;
        }
 
        PQclear(res);
@@ -11738,6 +11743,7 @@ dumpDumpableObject(Archive *fout, DumpableObject *dobj)
                        break;
                case DO_STATSEXT:
                        dumpStatisticsExt(fout, (const StatsExtInfo *) dobj);
+                       dumpStatisticsExtStats(fout, (const StatsExtInfo *) dobj);
                        break;
                case DO_REFRESH_MATVIEW:
                        refreshMatViewData(fout, (const TableDataInfo *) dobj);
@@ -18540,6 +18546,209 @@ dumpStatisticsExt(Archive *fout, const StatsExtInfo *statsextinfo)
        free(qstatsextname);
 }
 
+/*
+ * dumpStatisticsExtStats
+ *       write out to fout the stats for an extended statistics object
+ */
+static void
+dumpStatisticsExtStats(Archive *fout, const StatsExtInfo *statsextinfo)
+{
+       DumpOptions *dopt = fout->dopt;
+       PQExpBuffer query;
+       PGresult   *res;
+       int                     nstats;
+
+       /* Do nothing if not dumping statistics */
+       if (!dopt->dumpStatistics)
+               return;
+
+       if (!fout->is_prepared[PREPQUERY_DUMPEXTSTATSOBJSTATS])
+       {
+               PQExpBuffer pq = createPQExpBuffer();
+
+               /*---------
+                * Set up query for details about extended statistics objects.
+                *
+                * The query depends on the backend version:
+                * - In v19 and newer versions, query directly the pg_stats_ext*
+                *   catalogs.
+                * - In v18 and older versions, ndistinct and dependencies have a
+                *   different format that needs translation.
+                * - In v14 and older versions, inherited does not exist.
+                * - In v11 and older versions, there is no pg_stats_ext, hence
+                *   the logic joins pg_statistic_ext and pg_namespace.
+                *---------
+                */
+
+               appendPQExpBufferStr(pq,
+                                                        "PREPARE getExtStatsStats(pg_catalog.name, pg_catalog.name) AS\n"
+                                                        "SELECT ");
+
+               /*
+                * Versions 15 and newer have inherited stats.
+                *
+                * Create this column in all versions because we need to order by it
+                * later.
+                */
+               if (fout->remoteVersion >= 150000)
+                       appendPQExpBufferStr(pq, "e.inherited, ");
+               else
+                       appendPQExpBufferStr(pq, "false AS inherited, ");
+
+               /*--------
+                * The ndistinct and dependencies formats changed in v19, so
+                * everything before that needs to be translated.
+                *
+                * The ndistinct translation converts this kind of data:
+                * {"3, 4": 11, "3, 6": 11, "4, 6": 11, "3, 4, 6": 11}
+                *
+                * to this:
+                * [ {"attributes": [3,4], "ndistinct": 11},
+                *   {"attributes": [3,6], "ndistinct": 11},
+                *   {"attributes": [4,6], "ndistinct": 11},
+                *   {"attributes": [3,4,6], "ndistinct": 11} ]
+                *
+                * The dependencies translation converts this kind of data:
+                * {"3 => 4": 1.000000, "3 => 6": 1.000000,
+                *  "4 => 6": 1.000000, "3, 4 => 6": 1.000000,
+                *  "3, 6 => 4": 1.000000}
+                *
+                * to this:
+                * [ {"attributes": [3], "dependency": 4, "degree": 1.000000},
+                *   {"attributes": [3], "dependency": 6, "degree": 1.000000},
+                *   {"attributes": [4], "dependency": 6, "degree": 1.000000},
+                *   {"attributes": [3,4], "dependency": 6, "degree": 1.000000},
+                *   {"attributes": [3,6], "dependency": 4, "degree": 1.000000} ]
+                *--------
+                */
+               if (fout->remoteVersion >= 190000)
+                       appendPQExpBufferStr(pq, "e.n_distinct, e.dependencies ");
+               else
+                       appendPQExpBufferStr(pq,
+                                                                "( "
+                                                                "SELECT json_agg( "
+                                                                "  json_build_object( "
+                                                                "    '" PG_NDISTINCT_KEY_ATTRIBUTES "', "
+                                                                "    string_to_array(kv.key, ', ')::integer[], "
+                                                                "    '" PG_NDISTINCT_KEY_NDISTINCT "', "
+                                                                "    kv.value::bigint )) "
+                                                                "FROM json_each_text(e.n_distinct::text::json) AS kv"
+                                                                ") AS n_distinct, "
+                                                                "( "
+                                                                "SELECT json_agg( "
+                                                                "  json_build_object( "
+                                                                "    '" PG_DEPENDENCIES_KEY_ATTRIBUTES "', "
+                                                                "    string_to_array( "
+                                                                "      split_part(kv.key, ' => ', 1), "
+                                                                "      ', ')::integer[], "
+                                                                "    '" PG_DEPENDENCIES_KEY_DEPENDENCY "', "
+                                                                "    split_part(kv.key, ' => ', 2)::integer, "
+                                                                "    '" PG_DEPENDENCIES_KEY_DEGREE "', "
+                                                                "    kv.value::double precision )) "
+                                                                "FROM json_each_text(e.dependencies::text::json) AS kv "
+                                                                ") AS dependencies ");
+
+               /* pg_stats_ext introduced in v12 */
+               if (fout->remoteVersion >= 120000)
+                       appendPQExpBufferStr(pq,
+                                                                "FROM pg_catalog.pg_stats_ext AS e "
+                                                                "WHERE e.statistics_schemaname = $1 "
+                                                                "AND e.statistics_name = $2 ");
+               else
+                       appendPQExpBufferStr(pq,
+                                                                "FROM ( "
+                                                                "SELECT s.stxndistinct AS n_distinct, "
+                                                                "    s.stxdependencies AS dependencies "
+                                                                "FROM pg_catalog.pg_statistic_ext AS s "
+                                                                "JOIN pg_catalog.pg_namespace AS n "
+                                                                "ON n.oid = s.stxnamespace "
+                                                                "WHERE n.nspname = $1 "
+                                                                "AND s.stxname = $2 "
+                                                                ") AS e ");
+
+               /* we always have an inherited column, but it may be a constant */
+               appendPQExpBufferStr(pq, "ORDER BY inherited");
+
+               ExecuteSqlStatement(fout, pq->data);
+
+               fout->is_prepared[PREPQUERY_DUMPEXTSTATSOBJSTATS] = true;
+
+               destroyPQExpBuffer(pq);
+       }
+
+       query = createPQExpBuffer();
+
+       appendPQExpBufferStr(query, "EXECUTE getExtStatsStats(");
+       appendStringLiteralAH(query, statsextinfo->dobj.namespace->dobj.name, fout);
+       appendPQExpBufferStr(query, "::pg_catalog.name, ");
+       appendStringLiteralAH(query, statsextinfo->dobj.name, fout);
+       appendPQExpBufferStr(query, "::pg_catalog.name)");
+
+       res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK);
+
+       destroyPQExpBuffer(query);
+
+       nstats = PQntuples(res);
+
+       if (nstats > 0)
+       {
+               PQExpBuffer out = createPQExpBuffer();
+
+               int                     i_inherited = PQfnumber(res, "inherited");
+               int                     i_ndistinct = PQfnumber(res, "n_distinct");
+               int                     i_dependencies = PQfnumber(res, "dependencies");
+
+               for (int i = 0; i < nstats; i++)
+               {
+                       TableInfo  *tbinfo = statsextinfo->stattable;
+
+                       if (PQgetisnull(res, i, i_inherited))
+                               pg_fatal("inherited cannot be NULL");
+
+                       appendPQExpBufferStr(out,
+                                                                "SELECT * FROM pg_catalog.pg_restore_extended_stats(\n");
+                       appendPQExpBuffer(out, "\t'version', '%d'::integer,\n",
+                                                         fout->remoteVersion);
+
+                       /* Relation information */
+                       appendPQExpBufferStr(out, "\t'schemaname', ");
+                       appendStringLiteralAH(out, tbinfo->dobj.namespace->dobj.name, fout);
+                       appendPQExpBufferStr(out, ",\n\t'relname', ");
+                       appendStringLiteralAH(out, tbinfo->dobj.name, fout);
+
+                       /* Extended statistics information */
+                       appendPQExpBufferStr(out, ",\n\t'statistics_schemaname', ");
+                       appendStringLiteralAH(out, statsextinfo->dobj.namespace->dobj.name, fout);
+                       appendPQExpBufferStr(out, ",\n\t'statistics_name', ");
+                       appendStringLiteralAH(out, statsextinfo->dobj.name, fout);
+                       appendNamedArgument(out, fout, "inherited", "boolean",
+                                                               PQgetvalue(res, i, i_inherited));
+
+                       if (!PQgetisnull(res, i, i_ndistinct))
+                               appendNamedArgument(out, fout, "n_distinct", "pg_ndistinct",
+                                                                       PQgetvalue(res, i, i_ndistinct));
+
+                       if (!PQgetisnull(res, i, i_dependencies))
+                               appendNamedArgument(out, fout, "dependencies", "pg_dependencies",
+                                                                       PQgetvalue(res, i, i_dependencies));
+
+                       appendPQExpBufferStr(out, "\n);\n");
+               }
+
+               ArchiveEntry(fout, nilCatalogId, createDumpId(),
+                                        ARCHIVE_OPTS(.tag = statsextinfo->dobj.name,
+                                                                 .namespace = statsextinfo->dobj.namespace->dobj.name,
+                                                                 .owner = statsextinfo->rolname,
+                                                                 .description = "EXTENDED STATISTICS DATA",
+                                                                 .section = SECTION_POST_DATA,
+                                                                 .createStmt = out->data,
+                                                                 .deps = &statsextinfo->dobj.dumpId,
+                                                                 .nDeps = 1));
+               destroyPQExpBuffer(out);
+       }
+       PQclear(res);
+}
+
 /*
  * dumpConstraint
  *       write out to fout a user-defined constraint
index 28812d28aa9aff439650105e5a0fb022e51661d5..a8dcc2b5c757ad29de0d3e5a5d20219d473ec869 100644 (file)
@@ -4772,6 +4772,34 @@ my %tests = (
                },
        },
 
+       #
+       # EXTENDED stats will end up in SECTION_POST_DATA.
+       #
+       'extended_statistics_import' => {
+               create_sql => '
+                       CREATE TABLE dump_test.has_ext_stats
+                       AS SELECT g.g AS x, g.g / 2 AS y FROM generate_series(1,100) AS g(g);
+                       CREATE STATISTICS dump_test.es1 ON x, (y % 2) FROM dump_test.has_ext_stats;
+                       ANALYZE dump_test.has_ext_stats;',
+               regexp => qr/^
+                       \QSELECT * FROM pg_catalog.pg_restore_extended_stats(\E\s+/xm,
+               like => {
+                       %full_runs,
+                       %dump_test_schema_runs,
+                       no_data_no_schema => 1,
+                       no_schema => 1,
+                       section_post_data => 1,
+                       statistics_only => 1,
+                       schema_only_with_statistics => 1,
+               },
+               unlike => {
+                       exclude_dump_test_schema => 1,
+                       no_statistics => 1,
+                       only_dump_measurement => 1,
+                       schema_only => 1,
+               },
+       },
+
        #
        # While attribute stats (aka pg_statistic stats) only appear for tables
        # that have been analyzed, all tables will have relation stats because
index b8e641cc1cba45c667860d8b0e223ce9e0941271..5cc7a0b50ae79aef206b389b17b8ab571cdd3601 100644 (file)
@@ -353,8 +353,8 @@ sub adjust_old_dumpfile
        # Version comments will certainly not match.
        $dump =~ s/^-- Dumped from database version.*\n//mg;
 
-       # Same with version argument to pg_restore_relation_stats() or
-       # pg_restore_attribute_stats().
+       # Same with version argument to pg_restore_relation_stats(),
+       # pg_restore_attribute_stats() or pg_restore_extended_stats().
        $dump =~ s {\n(\s+'version',) '\d+'::integer,$}
                {$1 '000000'::integer,}mg;
 
@@ -703,8 +703,8 @@ sub adjust_new_dumpfile
        # Version comments will certainly not match.
        $dump =~ s/^-- Dumped from database version.*\n//mg;
 
-       # Same with version argument to pg_restore_relation_stats() or
-       # pg_restore_attribute_stats().
+       # Same with version argument to pg_restore_relation_stats(),
+       # pg_restore_attribute_stats() or pg_restore_extended_stats().
        $dump =~ s {\n(\s+'version',) '\d+'::integer,$}
                {$1 '000000'::integer,}mg;