]> git.ipfire.org Git - thirdparty/postgresql.git/commitdiff
pg_upgrade: Fix handling of pg_largeobject_metadata.
authorNathan Bossart <nathan@postgresql.org>
Mon, 9 Feb 2026 20:58:02 +0000 (14:58 -0600)
committerNathan Bossart <nathan@postgresql.org>
Mon, 9 Feb 2026 20:58:02 +0000 (14:58 -0600)
For binary upgrades from v16 or newer, pg_upgrade transfers the
files for pg_largeobject_metadata from the old cluster, as opposed
to using COPY or ordinary SQL commands to reconstruct its contents.
While this approach adds complexity, it can greatly reduce
pg_upgrade's runtime when there are many large objects.

Large objects with comments or security labels are one source of
complexity for this approach.  During pg_upgrade, schema
restoration happens before files are transferred.  Comments and
security labels are transferred in the former step, but the COMMENT
and SECURITY LABEL commands will fail if their corresponding large
objects do not exist.  To deal with this, pg_upgrade first copies
only the rows of pg_largeobject_metadata that are needed to avoid
failures.  Later, pg_upgrade overwrites those rows by replacing
pg_largeobject_metadata's files with its files in the old cluster.

Unfortunately, there's a subtle problem here.  Simply put, there's
no guarantee that pg_upgrade will overwrite all of
pg_largeobject_metadata's files on the new cluster.  For example,
the new cluster's version might more aggressively extend relations
or create visibility maps, and pg_upgrade's file transfer code is
not sophisticated enough to remove files that lack counterparts in
the old cluster.  These extra files could cause problems
post-upgrade.

More fortunately, we can simultaneously fix the aforementioned
problem and further optimize binary upgrades for clusters with many
large objects.  If we teach the COMMENT and SECURITY LABEL commands
to allow nonexistent large objects during binary upgrades,
pg_upgrade no longer needs to transfer pg_largeobject_metadata's
contents beforehand.  This approach also allows us to remove the
associated dependency tracking from pg_dump, even for upgrades from
v12-v15 that use COPY to transfer pg_largeobject_metadata's
contents.

In addition to what is described in the previous paragraph, this
commit modifies the query in getLOs() to only retrieve LOs with
comments or security labels for upgrades from v12 or newer.  We
have long assumed that such usage is rare, so this should reduce
pg_upgrade's memory usage and runtime in many cases.  We might also
be able to remove the "upgrades from v12 or newer" restriction on
the recent batch of optimizations by adding special handling for
pg_largeobject_metadata's hidden OID column on older versions
(since this catalog previously used the now-removed WITH OIDS
feature), but that is left as a future exercise.

Reported-by: Andres Freund <andres@anarazel.de>
Reviewed-by: Andres Freund <andres@anarazel.de>
Discussion: https://postgr.es/m/3yd2ss6n7xywo6pmhd7jjh3bqwgvx35bflzgv3ag4cnzfkik7m%40hiyadppqxx6w

src/backend/commands/comment.c
src/backend/commands/seclabel.c
src/bin/pg_dump/pg_dump.c

index caacb17e5d7a6e4eb83aff32833ec1212dc8751a..771aba2a69f2b6fa652b0f600c4fd7fa5be2a155 100644 (file)
@@ -41,6 +41,7 @@ CommentObject(CommentStmt *stmt)
 {
        Relation        relation;
        ObjectAddress address = InvalidObjectAddress;
+       bool            missing_ok;
 
        /*
         * When loading a dump, we may see a COMMENT ON DATABASE for the old name
@@ -63,6 +64,14 @@ CommentObject(CommentStmt *stmt)
                }
        }
 
+       /*
+        * During binary upgrade, allow nonexistent large objects so that we don't
+        * have to create them during schema restoration.  pg_upgrade will
+        * transfer the contents of pg_largeobject_metadata via COPY or by
+        * copying/linking its files from the old cluster later on.
+        */
+       missing_ok = IsBinaryUpgrade && stmt->objtype == OBJECT_LARGEOBJECT;
+
        /*
         * Translate the parser representation that identifies this object into an
         * ObjectAddress.  get_object_address() will throw an error if the object
@@ -70,7 +79,8 @@ CommentObject(CommentStmt *stmt)
         * against concurrent DROP operations.
         */
        address = get_object_address(stmt->objtype, stmt->object,
-                                                                &relation, ShareUpdateExclusiveLock, false);
+                                                                &relation, ShareUpdateExclusiveLock,
+                                                                missing_ok);
 
        /* Require ownership of the target object. */
        check_object_ownership(GetUserId(), stmt->objtype, address,
index 4160f5b6855a8347ab27123ef6299bfb74c0bf65..5b80396723c06f767861d2f8540422f83faa4f67 100644 (file)
@@ -118,6 +118,7 @@ ExecSecLabelStmt(SecLabelStmt *stmt)
        ObjectAddress address;
        Relation        relation;
        ListCell   *lc;
+       bool            missing_ok;
 
        /*
         * Find the named label provider, or if none specified, check whether
@@ -159,6 +160,14 @@ ExecSecLabelStmt(SecLabelStmt *stmt)
                                (errcode(ERRCODE_WRONG_OBJECT_TYPE),
                                 errmsg("security labels are not supported for this type of object")));
 
+       /*
+        * During binary upgrade, allow nonexistent large objects so that we don't
+        * have to create them during schema restoration.  pg_upgrade will
+        * transfer the contents of pg_largeobject_metadata via COPY or by
+        * copying/linking its files from the old cluster later on.
+        */
+       missing_ok = IsBinaryUpgrade && stmt->objtype == OBJECT_LARGEOBJECT;
+
        /*
         * Translate the parser representation which identifies this object into
         * an ObjectAddress. get_object_address() will throw an error if the
@@ -166,7 +175,8 @@ ExecSecLabelStmt(SecLabelStmt *stmt)
         * guard against concurrent modifications.
         */
        address = get_object_address(stmt->objtype, stmt->object,
-                                                                &relation, ShareUpdateExclusiveLock, false);
+                                                                &relation, ShareUpdateExclusiveLock,
+                                                                missing_ok);
 
        /* Require ownership of the target object. */
        check_object_ownership(GetUserId(), stmt->objtype, address,
index 2bebefd0ba218b7f54889c56f3085cd91337b355..2c3754d020fbabc2885533eb06870b08591ebd53 100644 (file)
@@ -214,12 +214,6 @@ static int nbinaryUpgradeClassOids = 0;
 static SequenceItem *sequences = NULL;
 static int     nsequences = 0;
 
-/*
- * For binary upgrade, the dump ID of pg_largeobject_metadata is saved for use
- * as a dependency for pg_shdepend and any large object comments/seclabels.
- */
-static DumpId lo_metadata_dumpId;
-
 /* Maximum number of relations to fetch in a fetchAttributeStats() call. */
 #define MAX_ATTR_STATS_RELS 64
 
@@ -1121,27 +1115,20 @@ main(int argc, char **argv)
                getTableData(&dopt, tblinfo, numTables, RELKIND_SEQUENCE);
 
        /*
-        * For binary upgrade mode, dump pg_largeobject_metadata and the
-        * associated pg_shdepend rows. This is faster to restore than the
-        * equivalent set of large object commands.  We can only do this for
-        * upgrades from v12 and newer; in older versions, pg_largeobject_metadata
-        * was created WITH OIDS, so the OID column is hidden and won't be dumped.
+        * For binary upgrade mode, dump the pg_shdepend rows for large objects
+        * and maybe even pg_largeobject_metadata (see comment below for details).
+        * This is faster to restore than the equivalent set of large object
+        * commands.  We can only do this for upgrades from v12 and newer; in
+        * older versions, pg_largeobject_metadata was created WITH OIDS, so the
+        * OID column is hidden and won't be dumped.
         */
        if (dopt.binary_upgrade && fout->remoteVersion >= 120000)
        {
-               TableInfo  *lo_metadata = findTableByOid(LargeObjectMetadataRelationId);
-               TableInfo  *shdepend = findTableByOid(SharedDependRelationId);
+               TableInfo  *shdepend;
 
-               makeTableDataInfo(&dopt, lo_metadata);
+               shdepend = findTableByOid(SharedDependRelationId);
                makeTableDataInfo(&dopt, shdepend);
 
-               /*
-                * Save pg_largeobject_metadata's dump ID for use as a dependency for
-                * pg_shdepend and any large object comments/seclabels.
-                */
-               lo_metadata_dumpId = lo_metadata->dataObj->dobj.dumpId;
-               addObjectDependency(&shdepend->dataObj->dobj, lo_metadata_dumpId);
-
                /*
                 * Only dump large object shdepend rows for this database.
                 */
@@ -1150,21 +1137,19 @@ main(int argc, char **argv)
                        "            WHERE datname = current_database())";
 
                /*
-                * If upgrading from v16 or newer, only dump large objects with
-                * comments/seclabels.  For these upgrades, pg_upgrade can copy/link
-                * pg_largeobject_metadata's files (which is usually faster) but we
-                * still need to dump LOs with comments/seclabels here so that the
-                * subsequent COMMENT and SECURITY LABEL commands work.  pg_upgrade
-                * can't copy/link the files from older versions because aclitem
-                * (needed by pg_largeobject_metadata.lomacl) changed its storage
-                * format in v16.
+                * For binary upgrades from v16 and newer versions, we can copy
+                * pg_largeobject_metadata's files from the old cluster, so we don't
+                * need to dump its contents.  pg_upgrade can't copy/link the files
+                * from older versions because aclitem (needed by
+                * pg_largeobject_metadata.lomacl) changed its storage format in v16.
                 */
-               if (fout->remoteVersion >= 160000)
-                       lo_metadata->dataObj->filtercond = "WHERE oid IN "
-                               "(SELECT objoid FROM pg_description "
-                               "WHERE classoid = " CppAsString2(LargeObjectRelationId) " "
-                               "UNION SELECT objoid FROM pg_seclabel "
-                               "WHERE classoid = " CppAsString2(LargeObjectRelationId) ")";
+               if (fout->remoteVersion < 160000)
+               {
+                       TableInfo  *lo_metadata;
+
+                       lo_metadata = findTableByOid(LargeObjectMetadataRelationId);
+                       makeTableDataInfo(&dopt, lo_metadata);
+               }
        }
 
        /*
@@ -3979,7 +3964,25 @@ getLOs(Archive *fout)
        appendPQExpBufferStr(loQry,
                                                 "SELECT oid, lomowner, lomacl, "
                                                 "acldefault('L', lomowner) AS acldefault "
-                                                "FROM pg_largeobject_metadata "
+                                                "FROM pg_largeobject_metadata ");
+
+       /*
+        * For binary upgrades from v12 or newer, we transfer
+        * pg_largeobject_metadata via COPY or by copying/linking its files from
+        * the old cluster.  On such upgrades, we only need to consider large
+        * objects that have comments or security labels, since we still restore
+        * those objects via COMMENT/SECURITY LABEL commands.
+        */
+       if (dopt->binary_upgrade &&
+               fout->remoteVersion >= 120000)
+               appendPQExpBufferStr(loQry,
+                                                        "WHERE oid IN "
+                                                        "(SELECT objoid FROM pg_description "
+                                                        "WHERE classoid = " CppAsString2(LargeObjectRelationId) " "
+                                                        "UNION SELECT objoid FROM pg_seclabel "
+                                                        "WHERE classoid = " CppAsString2(LargeObjectRelationId) ") ");
+
+       appendPQExpBufferStr(loQry,
                                                 "ORDER BY lomowner, lomacl::pg_catalog.text, oid");
 
        res = ExecuteSqlQuery(fout, loQry->data, PGRES_TUPLES_OK);
@@ -4062,36 +4065,20 @@ getLOs(Archive *fout)
                /*
                 * In binary-upgrade mode for LOs, we do *not* dump out the LO data,
                 * as it will be copied by pg_upgrade, which simply copies the
-                * pg_largeobject table. We *do* however dump out anything but the
-                * data, as pg_upgrade copies just pg_largeobject, but not
-                * pg_largeobject_metadata, after the dump is restored.  In versions
-                * before v12, this is done via proper large object commands.  In
-                * newer versions, we dump the content of pg_largeobject_metadata and
-                * any associated pg_shdepend rows, which is faster to restore.  (On
-                * <v12, pg_largeobject_metadata was created WITH OIDS, so the OID
-                * column is hidden and won't be dumped.)
+                * pg_largeobject table.
+                *
+                * The story for LO metadata is more complicated.  For upgrades from
+                * versions older than v12, we use ordinary SQL commands to restore
+                * both the content of pg_largeobject_metadata and any associated
+                * pg_shdepend rows.  For upgrades from newer versions, we transfer
+                * this information via COPY or by copying/linking the files from the
+                * old cluster.  For such upgrades, we do not need to dump the data,
+                * ACLs, or definitions of large objects.
                 */
                if (dopt->binary_upgrade)
                {
                        if (fout->remoteVersion >= 120000)
-                       {
-                               /*
-                                * We should've saved pg_largeobject_metadata's dump ID before
-                                * this point.
-                                */
-                               Assert(lo_metadata_dumpId);
-
                                loinfo->dobj.dump &= ~(DUMP_COMPONENT_DATA | DUMP_COMPONENT_ACL | DUMP_COMPONENT_DEFINITION);
-
-                               /*
-                                * Mark the large object as dependent on
-                                * pg_largeobject_metadata so that any large object
-                                * comments/seclables are dumped after it.
-                                */
-                               loinfo->dobj.dependencies = (DumpId *) pg_malloc(sizeof(DumpId));
-                               loinfo->dobj.dependencies[0] = lo_metadata_dumpId;
-                               loinfo->dobj.nDeps = loinfo->dobj.allocDeps = 1;
-                       }
                        else
                                loinfo->dobj.dump &= ~DUMP_COMPONENT_DATA;
                }