]> git.ipfire.org Git - thirdparty/postgresql.git/commitdiff
Fix subtransaction cleanup after an outer-subtransaction portal fails.
authorTom Lane <tgl@sss.pgh.pa.us>
Fri, 4 Sep 2015 17:36:50 +0000 (13:36 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Fri, 4 Sep 2015 17:36:50 +0000 (13:36 -0400)
Formerly, we treated only portals created in the current subtransaction as
having failed during subtransaction abort.  However, if the error occurred
while running a portal created in an outer subtransaction (ie, a cursor
declared before the last savepoint), that has to be considered broken too.

To allow reliable detection of which ones those are, add a bookkeeping
field to struct Portal that tracks the innermost subtransaction in which
each portal has actually been executed.  (Without this, we'd end up
failing portals containing functions that had called the subtransaction,
thereby breaking plpgsql exception blocks completely.)

In addition, when we fail an outer-subtransaction Portal, transfer its
resources into the subtransaction's resource owner, so that they're
released early in cleanup of the subxact.  This fixes a problem reported by
Jim Nasby in which a function executed in an outer-subtransaction cursor
could cause an Assert failure or crash by referencing a relation created
within the inner subtransaction.

The proximate cause of the Assert failure is that AtEOSubXact_RelationCache
assumed it could blow away a relcache entry without first checking that the
entry had zero refcount.  That was a bad idea on its own terms, so add such
a check there, and to the similar coding in AtEOXact_RelationCache.  This
provides an independent safety measure in case there are still ways to
provoke the situation despite the Portal-level changes.

This has been broken since subtransactions were invented, so back-patch
to all supported branches.

Tom Lane and Michael Paquier

src/backend/access/transam/xact.c
src/backend/commands/portalcmds.c
src/backend/tcop/pquery.c
src/backend/utils/cache/relcache.c
src/backend/utils/mmgr/portalmem.c
src/include/utils/portal.h
src/test/regress/expected/transactions.out
src/test/regress/sql/transactions.sql

index 9c848c612cbc843eb3cba94c9f91baa0474539f0..a1200ca8268983c88cc1e23b62858e3e62f94cf9 100644 (file)
@@ -4146,6 +4146,7 @@ AbortSubTransaction(void)
                AfterTriggerEndSubXact(false);
                AtSubAbort_Portals(s->subTransactionId,
                                                   s->parent->subTransactionId,
+                                                  s->curTransactionOwner,
                                                   s->parent->curTransactionOwner);
                AtEOSubXact_LargeObject(false, s->subTransactionId,
                                                                s->parent->subTransactionId);
index 1621465838cc5fcb98d690cdc334e190035b06f5..ed2605b5891292678c4715a3f32428143d022fc8 100644 (file)
@@ -335,11 +335,7 @@ PersistHoldablePortal(Portal portal)
        /*
         * Check for improper portal use, and mark portal active.
         */
-       if (portal->status != PORTAL_READY)
-               ereport(ERROR,
-                               (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
-                                errmsg("portal \"%s\" cannot be run", portal->name)));
-       portal->status = PORTAL_ACTIVE;
+       MarkPortalActive(portal);
 
        /*
         * Set up global portal context pointers.
index 87ea6ce9e52be17afa84749c16186043c1a7d39d..14f7d8efc46c7eb36994d1bb043de126e5a3fe5d 100644 (file)
@@ -733,11 +733,7 @@ PortalRun(Portal portal, long count, bool isTopLevel,
        /*
         * Check for improper portal use, and mark portal active.
         */
-       if (portal->status != PORTAL_READY)
-               ereport(ERROR,
-                               (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
-                                errmsg("portal \"%s\" cannot be run", portal->name)));
-       portal->status = PORTAL_ACTIVE;
+       MarkPortalActive(portal);
 
        /*
         * Set up global portal context pointers.
@@ -1397,11 +1393,7 @@ PortalRunFetch(Portal portal,
        /*
         * Check for improper portal use, and mark portal active.
         */
-       if (portal->status != PORTAL_READY)
-               ereport(ERROR,
-                               (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
-                                errmsg("portal \"%s\" cannot be run", portal->name)));
-       portal->status = PORTAL_ACTIVE;
+       MarkPortalActive(portal);
 
        /*
         * Set up global portal context pointers.
index 162588ee3d173b1ef24e1c58bd05b26a1f377b35..b9a969281bf7c09af9fa287688737ec718b2f8fc 100644 (file)
@@ -1831,7 +1831,9 @@ RelationClearRelation(Relation relation, bool rebuild)
 {
        /*
         * As per notes above, a rel to be rebuilt MUST have refcnt > 0; while of
-        * course it would be a bad idea to blow away one with nonzero refcnt.
+        * course it would be an equally bad idea to blow away one with nonzero
+        * refcnt, since that would leave someone somewhere with a dangling
+        * pointer.  All callers are expected to have verified that this holds.
         */
        Assert(rebuild ?
                   !RelationHasReferenceCountZero(relation) :
@@ -2321,11 +2323,25 @@ AtEOXact_RelationCache(bool isCommit)
                {
                        if (isCommit)
                                relation->rd_createSubid = InvalidSubTransactionId;
-                       else
+                       else if (RelationHasReferenceCountZero(relation))
                        {
                                RelationClearRelation(relation, false);
                                continue;
                        }
+                       else
+                       {
+                               /*
+                                * Hmm, somewhere there's a (leaked?) reference to the
+                                * relation.  We daren't remove the entry for fear of
+                                * dereferencing a dangling pointer later.  Bleat, and mark it
+                                * as not belonging to the current transaction.  Hopefully
+                                * it'll get cleaned up eventually.  This must be just a
+                                * WARNING to avoid error-during-error-recovery loops.
+                                */
+                               relation->rd_createSubid = InvalidSubTransactionId;
+                               elog(WARNING, "cannot remove relcache entry for \"%s\" because it has nonzero refcount",
+                                        RelationGetRelationName(relation));
+                       }
                }
 
                /*
@@ -2386,11 +2402,25 @@ AtEOSubXact_RelationCache(bool isCommit, SubTransactionId mySubid,
                {
                        if (isCommit)
                                relation->rd_createSubid = parentSubid;
-                       else
+                       else if (RelationHasReferenceCountZero(relation))
                        {
                                RelationClearRelation(relation, false);
                                continue;
                        }
+                       else
+                       {
+                               /*
+                                * Hmm, somewhere there's a (leaked?) reference to the
+                                * relation.  We daren't remove the entry for fear of
+                                * dereferencing a dangling pointer later.  Bleat, and
+                                * transfer it to the parent subtransaction so we can try
+                                * again later.  This must be just a WARNING to avoid
+                                * error-during-error-recovery loops.
+                                */
+                               relation->rd_createSubid = parentSubid;
+                               elog(WARNING, "cannot remove relcache entry for \"%s\" because it has nonzero refcount",
+                                        RelationGetRelationName(relation));
+                       }
                }
 
                /*
index 3dda7cff94c1e78ee6f39ce56fc375330e1245d5..6eb56adc0d06c5e1a1e58a75050fee5b484a8f9b 100644 (file)
@@ -231,6 +231,7 @@ CreatePortal(const char *name, bool allowDup, bool dupSilent)
        portal->status = PORTAL_NEW;
        portal->cleanup = PortalCleanup;
        portal->createSubid = GetCurrentSubTransactionId();
+       portal->activeSubid = portal->createSubid;
        portal->strategy = PORTAL_MULTI_QUERY;
        portal->cursorOptions = CURSOR_OPT_NO_SCROLL;
        portal->atStart = true;
@@ -401,6 +402,25 @@ UnpinPortal(Portal portal)
        portal->portalPinned = false;
 }
 
+/*
+ * MarkPortalActive
+ *             Transition a portal from READY to ACTIVE state.
+ *
+ * NOTE: never set portal->status = PORTAL_ACTIVE directly; call this instead.
+ */
+void
+MarkPortalActive(Portal portal)
+{
+       /* For safety, this is a runtime test not just an Assert */
+       if (portal->status != PORTAL_READY)
+               ereport(ERROR,
+                               (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+                                errmsg("portal \"%s\" cannot be run", portal->name)));
+       /* Perform the state transition */
+       portal->status = PORTAL_ACTIVE;
+       portal->activeSubid = GetCurrentSubTransactionId();
+}
+
 /*
  * MarkPortalDone
  *             Transition a portal from ACTIVE to DONE state.
@@ -689,6 +709,7 @@ PreCommit_Portals(bool isPrepare)
                         * not belonging to this transaction.
                         */
                        portal->createSubid = InvalidSubTransactionId;
+                       portal->activeSubid = InvalidSubTransactionId;
 
                        /* Report we changed state */
                        result = true;
@@ -835,8 +856,8 @@ AtCleanup_Portals(void)
 /*
  * Pre-subcommit processing for portals.
  *
- * Reassign the portals created in the current subtransaction to the parent
- * subtransaction.
+ * Reassign portals created or used in the current subtransaction to the
+ * parent subtransaction.
  */
 void
 AtSubCommit_Portals(SubTransactionId mySubid,
@@ -858,14 +879,16 @@ AtSubCommit_Portals(SubTransactionId mySubid,
                        if (portal->resowner)
                                ResourceOwnerNewParent(portal->resowner, parentXactOwner);
                }
+               if (portal->activeSubid == mySubid)
+                       portal->activeSubid = parentSubid;
        }
 }
 
 /*
  * Subtransaction abort handling for portals.
  *
- * Deactivate portals created during the failed subtransaction.
- * Note that per AtSubCommit_Portals, this will catch portals created
+ * Deactivate portals created or used during the failed subtransaction.
+ * Note that per AtSubCommit_Portals, this will catch portals created/used
  * in descendants of the subtransaction too.
  *
  * We don't destroy any portals here; that's done in AtSubCleanup_Portals.
@@ -873,6 +896,7 @@ AtSubCommit_Portals(SubTransactionId mySubid,
 void
 AtSubAbort_Portals(SubTransactionId mySubid,
                                   SubTransactionId parentSubid,
+                                  ResourceOwner myXactOwner,
                                   ResourceOwner parentXactOwner)
 {
        HASH_SEQ_STATUS status;
@@ -884,16 +908,58 @@ AtSubAbort_Portals(SubTransactionId mySubid,
        {
                Portal          portal = hentry->portal;
 
+               /* Was it created in this subtransaction? */
                if (portal->createSubid != mySubid)
+               {
+                       /* No, but maybe it was used in this subtransaction? */
+                       if (portal->activeSubid == mySubid)
+                       {
+                               /* Maintain activeSubid until the portal is removed */
+                               portal->activeSubid = parentSubid;
+
+                               /*
+                                * Upper-level portals that failed while running in this
+                                * subtransaction must be forced into FAILED state, for the
+                                * same reasons discussed below.
+                                *
+                                * We assume we can get away without forcing upper-level READY
+                                * portals to fail, even if they were run and then suspended.
+                                * In theory a suspended upper-level portal could have
+                                * acquired some references to objects that are about to be
+                                * destroyed, but there should be sufficient defenses against
+                                * such cases: the portal's original query cannot contain such
+                                * references, and any references within, say, cached plans of
+                                * PL/pgSQL functions are not from active queries and should
+                                * be protected by revalidation logic.
+                                */
+                               if (portal->status == PORTAL_ACTIVE)
+                                       MarkPortalFailed(portal);
+
+                               /*
+                                * Also, if we failed it during the current subtransaction
+                                * (either just above, or earlier), reattach its resource
+                                * owner to the current subtransaction's resource owner, so
+                                * that any resources it still holds will be released while
+                                * cleaning up this subtransaction.  This prevents some corner
+                                * cases wherein we might get Asserts or worse while cleaning
+                                * up objects created during the current subtransaction
+                                * (because they're still referenced within this portal).
+                                */
+                               if (portal->status == PORTAL_FAILED && portal->resowner)
+                               {
+                                       ResourceOwnerNewParent(portal->resowner, myXactOwner);
+                                       portal->resowner = NULL;
+                               }
+                       }
+                       /* Done if it wasn't created in this subtransaction */
                        continue;
+               }
 
                /*
                 * Force any live portals of my own subtransaction into FAILED state.
                 * We have to do this because they might refer to objects created or
-                * changed in the failed subtransaction, leading to crashes if
-                * execution is resumed, or even if we just try to run ExecutorEnd.
-                * (Note we do NOT do this to upper-level portals, since they cannot
-                * have such references and hence may be able to continue.)
+                * changed in the failed subtransaction, leading to crashes within
+                * ExecutorEnd when portalcmds.c tries to close down the portal.
                 */
                if (portal->status == PORTAL_READY ||
                        portal->status == PORTAL_ACTIVE)
index e7ee6f9574d0967abaf666bb5e63c809023aeaa4..3fe0e74ee37438a08ab21367453a39e5908b39c7 100644 (file)
@@ -118,12 +118,15 @@ typedef struct PortalData
        MemoryContext heap;                     /* subsidiary memory for portal */
        ResourceOwner resowner;         /* resources owned by portal */
        void            (*cleanup) (Portal portal);             /* cleanup hook */
-       SubTransactionId createSubid;           /* the ID of the creating subxact */
 
        /*
-        * if createSubid is InvalidSubTransactionId, the portal is held over from
-        * a previous transaction
+        * State data for remembering which subtransaction(s) the portal was
+        * created or used in.  If the portal is held over from a previous
+        * transaction, both subxids are InvalidSubTransactionId.  Otherwise,
+        * createSubid is the creating subxact and activeSubid is the last subxact
+        * in which we ran the portal.
         */
+       SubTransactionId createSubid;           /* the creating subxact */
 
        /* The query or queries the portal will execute */
        const char *sourceText;         /* text of query (as of 8.4, never NULL) */
@@ -174,6 +177,13 @@ typedef struct PortalData
        /* Presentation data, primarily used by the pg_cursors system view */
        TimestampTz creation_time;      /* time at which this portal was defined */
        bool            visible;                /* include this portal in pg_cursors? */
+
+       /*
+        * This field belongs with createSubid, but in pre-9.5 branches, add it
+        * at the end to avoid creating an ABI break for extensions that examine
+        * Portal structs.
+        */
+       SubTransactionId activeSubid;           /* the last subxact with activity */
 }      PortalData;
 
 /*
@@ -200,12 +210,14 @@ extern void AtSubCommit_Portals(SubTransactionId mySubid,
                                        ResourceOwner parentXactOwner);
 extern void AtSubAbort_Portals(SubTransactionId mySubid,
                                   SubTransactionId parentSubid,
+                                  ResourceOwner myXactOwner,
                                   ResourceOwner parentXactOwner);
 extern void AtSubCleanup_Portals(SubTransactionId mySubid);
 extern Portal CreatePortal(const char *name, bool allowDup, bool dupSilent);
 extern Portal CreateNewPortal(void);
 extern void PinPortal(Portal portal);
 extern void UnpinPortal(Portal portal);
+extern void MarkPortalActive(Portal portal);
 extern void MarkPortalDone(Portal portal);
 extern void MarkPortalFailed(Portal portal);
 extern void PortalDrop(Portal portal, bool isTopCommit);
index e9d3908b1ab53d69fc833774a82375902b548610..e5bab1be0df3ccb6b3496f1397b9c6f5ef976752 100644 (file)
@@ -601,6 +601,52 @@ fetch from foo;
 (1 row)
 
 abort;
+-- Test for proper cleanup after a failure in a cursor portal
+-- that was created in an outer subtransaction
+CREATE FUNCTION invert(x float8) RETURNS float8 LANGUAGE plpgsql AS
+$$ begin return 1/x; end $$;
+CREATE FUNCTION create_temp_tab() RETURNS text
+LANGUAGE plpgsql AS $$
+BEGIN
+  CREATE TEMP TABLE new_table (f1 float8);
+  -- case of interest is that we fail while holding an open
+  -- relcache reference to new_table
+  INSERT INTO new_table SELECT invert(0.0);
+  RETURN 'foo';
+END $$;
+BEGIN;
+DECLARE ok CURSOR FOR SELECT * FROM int8_tbl;
+DECLARE ctt CURSOR FOR SELECT create_temp_tab();
+FETCH ok;
+ q1  | q2  
+-----+-----
+ 123 | 456
+(1 row)
+
+SAVEPOINT s1;
+FETCH ok;  -- should work
+ q1  |        q2        
+-----+------------------
+ 123 | 4567890123456789
+(1 row)
+
+FETCH ctt; -- error occurs here
+ERROR:  division by zero
+CONTEXT:  PL/pgSQL function "invert" line 1 at RETURN
+SQL statement "INSERT INTO new_table SELECT invert(0.0)"
+PL/pgSQL function "create_temp_tab" line 6 at SQL statement
+ROLLBACK TO s1;
+FETCH ok;  -- should work
+        q1        | q2  
+------------------+-----
+ 4567890123456789 | 123
+(1 row)
+
+FETCH ctt; -- must be rejected
+ERROR:  portal "ctt" cannot be run
+COMMIT;
+DROP FUNCTION create_temp_tab();
+DROP FUNCTION invert(x float8);
 -- Test for successful cleanup of an aborted transaction at session exit.
 -- THIS MUST BE THE LAST TEST IN THIS FILE.
 begin;
index faf6a9bf9380444c9ab38bf642fc0a7f6e68e539..f500fe45c050c7d2d9bdc3a4c56df7eaf744c0b1 100644 (file)
@@ -368,6 +368,38 @@ fetch from foo;
 
 abort;
 
+
+-- Test for proper cleanup after a failure in a cursor portal
+-- that was created in an outer subtransaction
+CREATE FUNCTION invert(x float8) RETURNS float8 LANGUAGE plpgsql AS
+$$ begin return 1/x; end $$;
+
+CREATE FUNCTION create_temp_tab() RETURNS text
+LANGUAGE plpgsql AS $$
+BEGIN
+  CREATE TEMP TABLE new_table (f1 float8);
+  -- case of interest is that we fail while holding an open
+  -- relcache reference to new_table
+  INSERT INTO new_table SELECT invert(0.0);
+  RETURN 'foo';
+END $$;
+
+BEGIN;
+DECLARE ok CURSOR FOR SELECT * FROM int8_tbl;
+DECLARE ctt CURSOR FOR SELECT create_temp_tab();
+FETCH ok;
+SAVEPOINT s1;
+FETCH ok;  -- should work
+FETCH ctt; -- error occurs here
+ROLLBACK TO s1;
+FETCH ok;  -- should work
+FETCH ctt; -- must be rejected
+COMMIT;
+
+DROP FUNCTION create_temp_tab();
+DROP FUNCTION invert(x float8);
+
+
 -- Test for successful cleanup of an aborted transaction at session exit.
 -- THIS MUST BE THE LAST TEST IN THIS FILE.