]> git.ipfire.org Git - thirdparty/postgresql.git/commitdiff
vacuumdb: Add option for analyzing only relations missing stats.
authorNathan Bossart <nathan@postgresql.org>
Tue, 4 Feb 2025 21:07:54 +0000 (15:07 -0600)
committerJohn Naylor <john.naylor@postgresql.org>
Fri, 7 Mar 2025 03:17:35 +0000 (10:17 +0700)
This commit adds a new --missing-only option that can be used in
conjunction with --analyze-only and --analyze-in-stages.  When this
option is specified, vacuumdb will generate ANALYZE commands for a
relation if it is missing any statistics it should ordinarily have.
For example, if a table has statistics for one column but not
another, we will analyze the whole table.  A similar principle
applies to extended statistics, expression indexes, and table
inheritance.

Co-authored-by: Corey Huinker <corey.huinker@gmail.com>
Reviewed-by: TODO
Discussion: https://postgr.es/m/Z5O1bpcwDrMgyrYy%40nathan

doc/src/sgml/ref/vacuumdb.sgml
src/bin/scripts/t/102_vacuumdb_stages.pl
src/bin/scripts/vacuumdb.c
src/test/perl/PostgreSQL/Test/Cluster.pm

index 66fccb30a2d26013abb9750be8a56185c10ebe1d..5295a61f083379771a7a19f3c4bc1bb670fd999f 100644 (file)
@@ -277,6 +277,22 @@ PostgreSQL documentation
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--missing-only</option></term>
+      <listitem>
+       <para>
+        Only analyze relations that are missing statistics for a column, index
+        expression, or extended statistics object.  This option prevents
+        <application>vacuumdb</application> from deleting existing statistics
+        so that the query optimizer's choices do not become transiently worse.
+       </para>
+       <para>
+        This option can only be used in conjunction with
+        <option>--analyze-only</option> and <option>--analyze-in-stages</option>.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>-n <replaceable class="parameter">schema</replaceable></option></term>
       <term><option>--schema=<replaceable class="parameter">schema</replaceable></option></term>
index 984c8d06de60b63de96dc8a669dd5fd53df5d858..b216fb0c2c68d8ebf39a5bc9f71da3e39415d4de 100644 (file)
@@ -21,6 +21,66 @@ $node->issues_sql_like(
                    .*statement:\ ANALYZE/sx,
        'analyze three times');
 
+$node->safe_psql('postgres',
+       'CREATE TABLE regression_vacuumdb_test AS select generate_series(1, 10) a, generate_series(2, 11) b;');
+$node->issues_sql_like(
+       [ 'vacuumdb', '--analyze-in-stages', '--missing-only', '-t', 'regression_vacuumdb_test', 'postgres' ],
+       qr/statement:\ ANALYZE/sx,
+       '--missing-only with missing stats');
+$node->issues_sql_unlike(
+       [ 'vacuumdb', '--analyze-in-stages', '--missing-only', '-t', 'regression_vacuumdb_test', 'postgres' ],
+       qr/statement:\ ANALYZE/sx,
+       '--missing-only with no missing stats');
+
+$node->safe_psql('postgres',
+       'CREATE INDEX regression_vacuumdb_test_idx ON regression_vacuumdb_test (mod(a, 2));');
+$node->issues_sql_like(
+       [ 'vacuumdb', '--analyze-in-stages', '--missing-only', '-t', 'regression_vacuumdb_test', 'postgres' ],
+       qr/statement:\ ANALYZE/sx,
+       '--missing-only with missing index expression stats');
+$node->issues_sql_unlike(
+       [ 'vacuumdb', '--analyze-in-stages', '--missing-only', '-t', 'regression_vacuumdb_test', 'postgres' ],
+       qr/statement:\ ANALYZE/sx,
+       '--missing-only with no missing index expression stats');
+
+$node->safe_psql('postgres',
+       'CREATE STATISTICS regression_vacuumdb_test_stat ON a, b FROM regression_vacuumdb_test;');
+$node->issues_sql_like(
+       [ 'vacuumdb', '--analyze-in-stages', '--missing-only', '-t', 'regression_vacuumdb_test', 'postgres' ],
+       qr/statement:\ ANALYZE/sx,
+       '--missing-only with missing extended stats');
+$node->issues_sql_unlike(
+       [ 'vacuumdb', '--analyze-in-stages', '--missing-only', '-t', 'regression_vacuumdb_test', 'postgres' ],
+       qr/statement:\ ANALYZE/sx,
+       '--missing-only with no missing extended stats');
+
+$node->safe_psql('postgres',
+       "CREATE TABLE regression_vacuumdb_child (a INT) INHERITS (regression_vacuumdb_test);\n"
+       . "INSERT INTO regression_vacuumdb_child VALUES (1, 2);\n"
+       . "ANALYZE regression_vacuumdb_child;\n");
+$node->issues_sql_like(
+    [ 'vacuumdb', '--analyze-in-stages', '--missing-only', '-t', 'regression_vacuumdb_test', 'postgres' ],
+    qr/statement:\ ANALYZE/sx,
+    '--missing-only with missing inherited stats');
+$node->issues_sql_unlike(
+    [ 'vacuumdb', '--analyze-in-stages', '--missing-only', '-t', 'regression_vacuumdb_test', 'postgres' ],
+    qr/statement:\ ANALYZE/sx,
+    '--missing-only with no missing inherited stats');
+
+$node->safe_psql('postgres',
+       "CREATE TABLE regression_vacuumdb_parted (a INT) PARTITION BY LIST (a);\n"
+       . "CREATE TABLE regression_vacuumdb_part1 PARTITION OF regression_vacuumdb_parted FOR VALUES IN (1);\n"
+       . "INSERT INTO regression_vacuumdb_parted VALUES (1);\n"
+       . "ANALYZE regression_vacuumdb_part1;\n");
+$node->issues_sql_like(
+    [ 'vacuumdb', '--analyze-in-stages', '--missing-only', '-t', 'regression_vacuumdb_parted', 'postgres' ],
+    qr/statement:\ ANALYZE/sx,
+    '--missing-only with missing partition stats');
+$node->issues_sql_unlike(
+    [ 'vacuumdb', '--analyze-in-stages', '--missing-only', '-t', 'regression_vacuumdb_parted', 'postgres' ],
+    qr/statement:\ ANALYZE/sx,
+    '--missing-only with no missing partition stats');
+
 $node->issues_sql_like(
        [ 'vacuumdb', '--analyze-in-stages', '--all' ],
        qr/statement:\ SET\ default_statistics_target=1;\ SET\ vacuum_cost_delay=0;
index 982bf070be6f2cc543f654a66d8bacda4c258a82..101862ceb6c66fcec005a6a6f7da43e726d91882 100644 (file)
@@ -47,6 +47,7 @@ typedef struct vacuumingOptions
        bool            process_toast;
        bool            skip_database_stats;
        char       *buffer_usage_limit;
+       bool            missing_only;
 } vacuumingOptions;
 
 /* object filter options */
@@ -128,6 +129,7 @@ main(int argc, char *argv[])
                {"no-process-toast", no_argument, NULL, 11},
                {"no-process-main", no_argument, NULL, 12},
                {"buffer-usage-limit", required_argument, NULL, 13},
+               {"missing-only", no_argument, NULL, 14},
                {NULL, 0, NULL, 0}
        };
 
@@ -275,6 +277,9 @@ main(int argc, char *argv[])
                        case 13:
                                vacopts.buffer_usage_limit = escape_quotes(optarg);
                                break;
+                       case 14:
+                               vacopts.missing_only = true;
+                               break;
                        default:
                                /* getopt_long already emitted a complaint */
                                pg_log_error_hint("Try \"%s --help\" for more information.", progname);
@@ -360,6 +365,11 @@ main(int argc, char *argv[])
                pg_fatal("cannot use the \"%s\" option with the \"%s\" option",
                                 "buffer-usage-limit", "full");
 
+       /* Prohibit --missing-only without --analyze-only or --analyze-in-stages */
+       if (vacopts.missing_only && !vacopts.analyze_only)
+               pg_fatal("cannot use the \"%s\" option without \"%s\" or \"%s\"",
+                                "missing-only", "analyze-only", "analyze-in-stages");
+
        /* fill cparams except for dbname, which is set below */
        cparams.pghost = host;
        cparams.pgport = port;
@@ -584,6 +594,13 @@ vacuum_one_database(ConnParams *cparams,
                                 "--buffer-usage-limit", "16");
        }
 
+       if (vacopts->missing_only && PQserverVersion(conn) < 150000)
+       {
+               PQfinish(conn);
+               pg_fatal("cannot use the \"%s\" option on server versions older than PostgreSQL %s",
+                                "--missing-only", "15");
+       }
+
        /* skip_database_stats is used automatically if server supports it */
        vacopts->skip_database_stats = (PQserverVersion(conn) >= 160000);
 
@@ -672,6 +689,7 @@ vacuum_one_database(ConnParams *cparams,
                                                 " FROM pg_catalog.pg_class c\n"
                                                 " JOIN pg_catalog.pg_namespace ns"
                                                 " ON c.relnamespace OPERATOR(pg_catalog.=) ns.oid\n"
+                                                " CROSS JOIN LATERAL (SELECT c.relkind IN ('p', 'I')) as p (inherited)\n"
                                                 " LEFT JOIN pg_catalog.pg_class t"
                                                 " ON c.reltoastrelid OPERATOR(pg_catalog.=) t.oid\n");
 
@@ -755,6 +773,79 @@ vacuum_one_database(ConnParams *cparams,
                                                  vacopts->min_mxid_age);
        }
 
+       if (vacopts->missing_only)
+       {
+               appendPQExpBufferStr(&catalog_query, " AND (\n");
+
+               /* regular stats */
+               appendPQExpBufferStr(&catalog_query,
+                                                        " EXISTS (SELECT NULL FROM pg_catalog.pg_attribute a\n"
+                                                        " WHERE a.attrelid OPERATOR(pg_catalog.=) c.oid\n"
+                                                        " AND c.reltuples OPERATOR(pg_catalog.!=) 0::pg_catalog.float4\n"
+                                                        " AND a.attnum OPERATOR(pg_catalog.>) 0::pg_catalog.int2\n"
+                                                        " AND NOT a.attisdropped\n"
+                                                        " AND a.attstattarget IS DISTINCT FROM 0::pg_catalog.int2\n"
+                                                        " AND NOT EXISTS (SELECT NULL FROM pg_catalog.pg_statistic s\n"
+                                                        " WHERE s.starelid OPERATOR(pg_catalog.=) a.attrelid\n"
+                                                        " AND s.staattnum OPERATOR(pg_catalog.=) a.attnum\n"
+                                                        " AND s.stainherit OPERATOR(pg_catalog.=) p.inherited))\n");
+
+               /* extended stats */
+               appendPQExpBufferStr(&catalog_query,
+                                                        " OR EXISTS (SELECT NULL FROM pg_catalog.pg_statistic_ext e\n"
+                                                        " WHERE e.stxrelid OPERATOR(pg_catalog.=) c.oid\n"
+                                                        " AND c.reltuples OPERATOR(pg_catalog.!=) 0::pg_catalog.float4\n"
+                                                        " AND e.stxstattarget IS DISTINCT FROM 0::pg_catalog.int2\n"
+                                                        " AND NOT EXISTS (SELECT NULL FROM pg_catalog.pg_statistic_ext_data d\n"
+                                                        " WHERE d.stxoid OPERATOR(pg_catalog.=) e.oid\n"
+                                                        " AND d.stxdinherit OPERATOR(pg_catalog.=) p.inherited))\n");
+
+               /* expression indexes */
+               appendPQExpBufferStr(&catalog_query,
+                                                        " OR EXISTS (SELECT NULL FROM pg_catalog.pg_index i\n"
+                                                        " CROSS JOIN LATERAL pg_catalog.unnest(i.indkey) WITH ORDINALITY u (attnum, ord)\n"
+                                                        " WHERE i.indrelid OPERATOR(pg_catalog.=) c.oid\n"
+                                                        " AND c.reltuples OPERATOR(pg_catalog.!=) 0::pg_catalog.float4\n"
+                                                        " AND i.indexprs IS NOT NULL\n"
+                                                        " AND NOT EXISTS (SELECT NULL FROM pg_catalog.pg_statistic s\n"
+                                                        " WHERE s.starelid OPERATOR(pg_catalog.=) i.indexrelid\n"
+                                                        " AND s.staattnum OPERATOR(pg_catalog.=) u.ord\n"
+                                                        " AND s.stainherit OPERATOR(pg_catalog.=) p.inherited))\n");
+
+               /* table inheritance and regular stats */
+               appendPQExpBufferStr(&catalog_query,
+                                                        " OR EXISTS (SELECT NULL FROM pg_catalog.pg_attribute a\n"
+                                                        " WHERE a.attrelid OPERATOR(pg_catalog.=) c.oid\n"
+                                                        " AND c.reltuples OPERATOR(pg_catalog.!=) 0::pg_catalog.float4\n"
+                                                        " AND a.attnum OPERATOR(pg_catalog.>) 0::pg_catalog.int2\n"
+                                                        " AND NOT a.attisdropped\n"
+                                                        " AND a.attstattarget IS DISTINCT FROM 0::pg_catalog.int2\n"
+                                                        " AND c.relhassubclass\n"
+                                                        " AND NOT p.inherited\n"
+                                                        " AND EXISTS (SELECT NULL FROM pg_catalog.pg_inherits h\n"
+                                                        " WHERE h.inhparent OPERATOR(pg_catalog.=) c.oid)\n"
+                                                        " AND NOT EXISTS (SELECT NULL FROM pg_catalog.pg_statistic s\n"
+                                                        " WHERE s.starelid OPERATOR(pg_catalog.=) a.attrelid\n"
+                                                        " AND s.staattnum OPERATOR(pg_catalog.=) a.attnum\n"
+                                                        " AND s.stainherit))\n");
+
+               /* table inheritance and extended stats */
+               appendPQExpBufferStr(&catalog_query,
+                                                        " OR EXISTS (SELECT NULL FROM pg_catalog.pg_statistic_ext e\n"
+                                                        " WHERE e.stxrelid OPERATOR(pg_catalog.=) c.oid\n"
+                                                        " AND c.reltuples OPERATOR(pg_catalog.!=) 0::pg_catalog.float4\n"
+                                                        " AND e.stxstattarget IS DISTINCT FROM 0::pg_catalog.int2\n"
+                                                        " AND c.relhassubclass\n"
+                                                        " AND NOT p.inherited\n"
+                                                        " AND EXISTS (SELECT NULL FROM pg_catalog.pg_inherits h\n"
+                                                        " WHERE h.inhparent OPERATOR(pg_catalog.=) c.oid)\n"
+                                                        " AND NOT EXISTS (SELECT NULL FROM pg_catalog.pg_statistic_ext_data d\n"
+                                                        " WHERE d.stxoid OPERATOR(pg_catalog.=) e.oid\n"
+                                                        " AND d.stxdinherit))\n");
+
+               appendPQExpBufferStr(&catalog_query, " )\n");
+       }
+
        /*
         * Execute the catalog query.  We use the default search_path for this
         * query for consistency with table lookups done elsewhere by the user.
@@ -1181,6 +1272,7 @@ help(const char *progname)
        printf(_("  -j, --jobs=NUM                  use this many concurrent connections to vacuum\n"));
        printf(_("      --min-mxid-age=MXID_AGE     minimum multixact ID age of tables to vacuum\n"));
        printf(_("      --min-xid-age=XID_AGE       minimum transaction ID age of tables to vacuum\n"));
+       printf(_("      --missing-only              only analyze relations with missing statistics\n"));
        printf(_("      --no-index-cleanup          don't remove index entries that point to dead tuples\n"));
        printf(_("      --no-process-main           skip the main relation\n"));
        printf(_("      --no-process-toast          skip the TOAST table associated with the table to vacuum\n"));
index b105cba05a6bcc53dfeacac91d69538cdb51e0cc..ff8e04d3a039182be36ba137815c5c9a929f9e95 100644 (file)
@@ -2820,6 +2820,33 @@ sub issues_sql_like
 
 =pod
 
+=item $node->issues_sql_unlike(cmd, unexpected_sql, test_name)
+
+Run a command on the node, then verify that $unexpected_sql does not appear in
+the server log file.
+
+=cut
+
+sub issues_sql_unlike
+{
+       local $Test::Builder::Level = $Test::Builder::Level + 1;
+
+       my ($self, $cmd, $unexpected_sql, $test_name) = @_;
+
+       local %ENV = $self->_get_env();
+
+       my $log_location = -s $self->logfile;
+
+       my $result = PostgreSQL::Test::Utils::run_log($cmd);
+       ok($result, "@$cmd exit code 0");
+       my $log =
+         PostgreSQL::Test::Utils::slurp_file($self->logfile, $log_location);
+       unlike($log, $unexpected_sql, "$test_name: SQL not found in server log");
+       return;
+}
+
+=pod
+
 =item $node->log_content()
 
 Returns the contents of log of the node