]> git.ipfire.org Git - thirdparty/postgresql.git/commitdiff
vacuumdb: Teach vacuum_one_database() to reuse query results.
authorNathan Bossart <nathan@postgresql.org>
Tue, 18 Mar 2025 21:32:55 +0000 (16:32 -0500)
committerNathan Bossart <nathan@postgresql.org>
Tue, 18 Mar 2025 21:32:55 +0000 (16:32 -0500)
Presently, each call to vacuum_one_database() queries the catalogs
to retrieve the list of tables to process.  A follow-up commit will
add a "missing stats only" feature to --analyze-in-stages, which
requires saving the catalog query results (since tables without
statistics will have them after the first stage).  This commit adds
a new parameter to vacuum_one_database() that specifies either a
previously-retrieved list or a place to return the catalog query
results.  Note that nothing uses this new parameter yet.

Author: Corey Huinker <corey.huinker@gmail.com>
Co-authored-by: Nathan Bossart <nathandbossart@gmail.com>
Reviewed-by: John Naylor <johncnaylorls@gmail.com>
Discussion: https://postgr.es/m/Z5O1bpcwDrMgyrYy%40nathan

src/bin/scripts/vacuumdb.c

index 982bf070be6f2cc543f654a66d8bacda4c258a82..e28f82a0ebaf449cbf5f4014a3b6c40f0af3224f 100644 (file)
@@ -62,10 +62,16 @@ typedef enum
 
 static VacObjFilter objfilter = OBJFILTER_NONE;
 
+static SimpleStringList *retrieve_objects(PGconn *conn,
+                                                                                 vacuumingOptions *vacopts,
+                                                                                 SimpleStringList *objects,
+                                                                                 bool echo);
+
 static void vacuum_one_database(ConnParams *cparams,
                                                                vacuumingOptions *vacopts,
                                                                int stage,
                                                                SimpleStringList *objects,
+                                                               SimpleStringList **found_objs,
                                                                int concurrentCons,
                                                                const char *progname, bool echo, bool quiet);
 
@@ -405,7 +411,7 @@ main(int argc, char *argv[])
                        {
                                vacuum_one_database(&cparams, &vacopts,
                                                                        stage,
-                                                                       &objects,
+                                                                       &objects, NULL,
                                                                        concurrentCons,
                                                                        progname, echo, quiet);
                        }
@@ -413,7 +419,7 @@ main(int argc, char *argv[])
                else
                        vacuum_one_database(&cparams, &vacopts,
                                                                ANALYZE_NO_STAGE,
-                                                               &objects,
+                                                               &objects, NULL,
                                                                concurrentCons,
                                                                progname, echo, quiet);
        }
@@ -461,8 +467,36 @@ escape_quotes(const char *src)
 /*
  * vacuum_one_database
  *
- * Process tables in the given database.  If the 'objects' list is empty,
- * process all tables in the database.
+ * Process tables in the given database.
+ *
+ * There are two ways to specify the list of objects to process:
+ *
+ * 1) The "found_objs" parameter is a double pointer to a fully qualified list
+ *    of objects to process, as returned by a previous call to
+ *    vacuum_one_database().
+ *
+ *     a) If both "found_objs" (the double pointer) and "*found_objs" (the
+ *        once-dereferenced double pointer) are not NULL, this list takes
+ *        priority, and anything specified in "objects" is ignored.
+ *
+ *     b) If "found_objs" (the double pointer) is not NULL but "*found_objs"
+ *        (the once-dereferenced double pointer) _is_ NULL, the "objects"
+ *        parameter takes priority, and the results of the catalog query
+ *        described in (2) are stored in "found_objs".
+ *
+ *     c) If "found_objs" (the double pointer) is NULL, the "objects"
+ *        parameter again takes priority, and the results of the catalog query
+ *        are not saved.
+ *
+ * 2) The "objects" parameter is a user-specified list of objects to process.
+ *    When (1b) or (1c) applies, this function performs a catalog query to
+ *    retrieve a fully qualified list of objects to process, as described
+ *    below.
+ *
+ *     a) If "objects" is not NULL, the catalog query gathers only the objects
+ *        listed in "objects".
+ *
+ *     b) If "objects" is NULL, all tables in the database are gathered.
  *
  * Note that this function is only concerned with running exactly one stage
  * when in analyze-in-stages mode; caller must iterate on us if necessary.
@@ -475,22 +509,18 @@ vacuum_one_database(ConnParams *cparams,
                                        vacuumingOptions *vacopts,
                                        int stage,
                                        SimpleStringList *objects,
+                                       SimpleStringList **found_objs,
                                        int concurrentCons,
                                        const char *progname, bool echo, bool quiet)
 {
        PQExpBufferData sql;
-       PQExpBufferData buf;
-       PQExpBufferData catalog_query;
-       PGresult   *res;
        PGconn     *conn;
        SimpleStringListCell *cell;
        ParallelSlotArray *sa;
-       SimpleStringList dbtables = {NULL, NULL};
-       int                     i;
-       int                     ntups;
+       int                     ntups = 0;
        bool            failed = false;
-       bool            objects_listed = false;
        const char *initcmd;
+       SimpleStringList *ret = NULL;
        const char *stage_commands[] = {
                "SET default_statistics_target=1; SET vacuum_cost_delay=0;",
                "SET default_statistics_target=10; RESET vacuum_cost_delay;",
@@ -599,19 +629,155 @@ vacuum_one_database(ConnParams *cparams,
        }
 
        /*
-        * Prepare the list of tables to process by querying the catalogs.
-        *
-        * Since we execute the constructed query with the default search_path
-        * (which could be unsafe), everything in this query MUST be fully
-        * qualified.
-        *
-        * First, build a WITH clause for the catalog query if any tables were
-        * specified, with a set of values made of relation names and their
-        * optional set of columns.  This is used to match any provided column
-        * lists with the generated qualified identifiers and to filter for the
-        * tables provided via --table.  If a listed table does not exist, the
-        * catalog query will fail.
+        * If the caller provided the results of a previous catalog query, just
+        * use that.  Otherwise, run the catalog query ourselves and set the
+        * return variable if provided.
+        */
+       if (found_objs && *found_objs)
+               ret = *found_objs;
+       else
+       {
+               ret = retrieve_objects(conn, vacopts, objects, echo);
+               if (found_objs)
+                       *found_objs = ret;
+       }
+
+       /*
+        * Count the number of objects in the catalog query result.  If there are
+        * none, we are done.
+        */
+       for (cell = ret ? ret->head : NULL; cell; cell = cell->next)
+               ntups++;
+
+       if (ntups == 0)
+       {
+               PQfinish(conn);
+               return;
+       }
+
+       /*
+        * Ensure concurrentCons is sane.  If there are more connections than
+        * vacuumable relations, we don't need to use them all.
         */
+       if (concurrentCons > ntups)
+               concurrentCons = ntups;
+       if (concurrentCons <= 0)
+               concurrentCons = 1;
+
+       /*
+        * All slots need to be prepared to run the appropriate analyze stage, if
+        * caller requested that mode.  We have to prepare the initial connection
+        * ourselves before setting up the slots.
+        */
+       if (stage == ANALYZE_NO_STAGE)
+               initcmd = NULL;
+       else
+       {
+               initcmd = stage_commands[stage];
+               executeCommand(conn, initcmd, echo);
+       }
+
+       /*
+        * Setup the database connections. We reuse the connection we already have
+        * for the first slot.  If not in parallel mode, the first slot in the
+        * array contains the connection.
+        */
+       sa = ParallelSlotsSetup(concurrentCons, cparams, progname, echo, initcmd);
+       ParallelSlotsAdoptConn(sa, conn);
+
+       initPQExpBuffer(&sql);
+
+       cell = ret->head;
+       do
+       {
+               const char *tabname = cell->val;
+               ParallelSlot *free_slot;
+
+               if (CancelRequested)
+               {
+                       failed = true;
+                       goto finish;
+               }
+
+               free_slot = ParallelSlotsGetIdle(sa, NULL);
+               if (!free_slot)
+               {
+                       failed = true;
+                       goto finish;
+               }
+
+               prepare_vacuum_command(&sql, PQserverVersion(free_slot->connection),
+                                                          vacopts, tabname);
+
+               /*
+                * Execute the vacuum.  All errors are handled in processQueryResult
+                * through ParallelSlotsGetIdle.
+                */
+               ParallelSlotSetHandler(free_slot, TableCommandResultHandler, NULL);
+               run_vacuum_command(free_slot->connection, sql.data,
+                                                  echo, tabname);
+
+               cell = cell->next;
+       } while (cell != NULL);
+
+       if (!ParallelSlotsWaitCompletion(sa))
+       {
+               failed = true;
+               goto finish;
+       }
+
+       /* If we used SKIP_DATABASE_STATS, mop up with ONLY_DATABASE_STATS */
+       if (vacopts->skip_database_stats && stage == ANALYZE_NO_STAGE)
+       {
+               const char *cmd = "VACUUM (ONLY_DATABASE_STATS);";
+               ParallelSlot *free_slot = ParallelSlotsGetIdle(sa, NULL);
+
+               if (!free_slot)
+               {
+                       failed = true;
+                       goto finish;
+               }
+
+               ParallelSlotSetHandler(free_slot, TableCommandResultHandler, NULL);
+               run_vacuum_command(free_slot->connection, cmd, echo, NULL);
+
+               if (!ParallelSlotsWaitCompletion(sa))
+                       failed = true;
+       }
+
+finish:
+       ParallelSlotsTerminate(sa);
+       pg_free(sa);
+
+       termPQExpBuffer(&sql);
+
+       if (failed)
+               exit(1);
+}
+
+/*
+ * Prepare the list of tables to process by querying the catalogs.
+ *
+ * Since we execute the constructed query with the default search_path (which
+ * could be unsafe), everything in this query MUST be fully qualified.
+ *
+ * First, build a WITH clause for the catalog query if any tables were
+ * specified, with a set of values made of relation names and their optional
+ * set of columns.  This is used to match any provided column lists with the
+ * generated qualified identifiers and to filter for the tables provided via
+ * --table.  If a listed table does not exist, the catalog query will fail.
+ */
+static SimpleStringList *
+retrieve_objects(PGconn *conn, vacuumingOptions *vacopts,
+                                SimpleStringList *objects, bool echo)
+{
+       PQExpBufferData buf;
+       PQExpBufferData catalog_query;
+       PGresult   *res;
+       SimpleStringListCell *cell;
+       SimpleStringList *found_objs = palloc0(sizeof(SimpleStringList));
+       bool            objects_listed = false;
+
        initPQExpBuffer(&catalog_query);
        for (cell = objects ? objects->head : NULL; cell; cell = cell->next)
        {
@@ -765,23 +931,12 @@ vacuum_one_database(ConnParams *cparams,
        termPQExpBuffer(&catalog_query);
        PQclear(executeQuery(conn, ALWAYS_SECURE_SEARCH_PATH_SQL, echo));
 
-       /*
-        * If no rows are returned, there are no matching tables, so we are done.
-        */
-       ntups = PQntuples(res);
-       if (ntups == 0)
-       {
-               PQclear(res);
-               PQfinish(conn);
-               return;
-       }
-
        /*
         * Build qualified identifiers for each table, including the column list
         * if given.
         */
        initPQExpBuffer(&buf);
-       for (i = 0; i < ntups; i++)
+       for (int i = 0; i < PQntuples(res); i++)
        {
                appendPQExpBufferStr(&buf,
                                                         fmtQualifiedIdEnc(PQgetvalue(res, i, 1),
@@ -791,110 +946,13 @@ vacuum_one_database(ConnParams *cparams,
                if (objects_listed && !PQgetisnull(res, i, 2))
                        appendPQExpBufferStr(&buf, PQgetvalue(res, i, 2));
 
-               simple_string_list_append(&dbtables, buf.data);
+               simple_string_list_append(found_objs, buf.data);
                resetPQExpBuffer(&buf);
        }
        termPQExpBuffer(&buf);
        PQclear(res);
 
-       /*
-        * Ensure concurrentCons is sane.  If there are more connections than
-        * vacuumable relations, we don't need to use them all.
-        */
-       if (concurrentCons > ntups)
-               concurrentCons = ntups;
-       if (concurrentCons <= 0)
-               concurrentCons = 1;
-
-       /*
-        * All slots need to be prepared to run the appropriate analyze stage, if
-        * caller requested that mode.  We have to prepare the initial connection
-        * ourselves before setting up the slots.
-        */
-       if (stage == ANALYZE_NO_STAGE)
-               initcmd = NULL;
-       else
-       {
-               initcmd = stage_commands[stage];
-               executeCommand(conn, initcmd, echo);
-       }
-
-       /*
-        * Setup the database connections. We reuse the connection we already have
-        * for the first slot.  If not in parallel mode, the first slot in the
-        * array contains the connection.
-        */
-       sa = ParallelSlotsSetup(concurrentCons, cparams, progname, echo, initcmd);
-       ParallelSlotsAdoptConn(sa, conn);
-
-       initPQExpBuffer(&sql);
-
-       cell = dbtables.head;
-       do
-       {
-               const char *tabname = cell->val;
-               ParallelSlot *free_slot;
-
-               if (CancelRequested)
-               {
-                       failed = true;
-                       goto finish;
-               }
-
-               free_slot = ParallelSlotsGetIdle(sa, NULL);
-               if (!free_slot)
-               {
-                       failed = true;
-                       goto finish;
-               }
-
-               prepare_vacuum_command(&sql, PQserverVersion(free_slot->connection),
-                                                          vacopts, tabname);
-
-               /*
-                * Execute the vacuum.  All errors are handled in processQueryResult
-                * through ParallelSlotsGetIdle.
-                */
-               ParallelSlotSetHandler(free_slot, TableCommandResultHandler, NULL);
-               run_vacuum_command(free_slot->connection, sql.data,
-                                                  echo, tabname);
-
-               cell = cell->next;
-       } while (cell != NULL);
-
-       if (!ParallelSlotsWaitCompletion(sa))
-       {
-               failed = true;
-               goto finish;
-       }
-
-       /* If we used SKIP_DATABASE_STATS, mop up with ONLY_DATABASE_STATS */
-       if (vacopts->skip_database_stats && stage == ANALYZE_NO_STAGE)
-       {
-               const char *cmd = "VACUUM (ONLY_DATABASE_STATS);";
-               ParallelSlot *free_slot = ParallelSlotsGetIdle(sa, NULL);
-
-               if (!free_slot)
-               {
-                       failed = true;
-                       goto finish;
-               }
-
-               ParallelSlotSetHandler(free_slot, TableCommandResultHandler, NULL);
-               run_vacuum_command(free_slot->connection, cmd, echo, NULL);
-
-               if (!ParallelSlotsWaitCompletion(sa))
-                       failed = true;
-       }
-
-finish:
-       ParallelSlotsTerminate(sa);
-       pg_free(sa);
-
-       termPQExpBuffer(&sql);
-
-       if (failed)
-               exit(1);
+       return found_objs;
 }
 
 /*
@@ -941,7 +999,7 @@ vacuum_all_databases(ConnParams *cparams,
 
                                vacuum_one_database(cparams, vacopts,
                                                                        stage,
-                                                                       objects,
+                                                                       objects, NULL,
                                                                        concurrentCons,
                                                                        progname, echo, quiet);
                        }
@@ -955,7 +1013,7 @@ vacuum_all_databases(ConnParams *cparams,
 
                        vacuum_one_database(cparams, vacopts,
                                                                ANALYZE_NO_STAGE,
-                                                               objects,
+                                                               objects, NULL,
                                                                concurrentCons,
                                                                progname, echo, quiet);
                }