Revert "Skip WAL for new relfilenodes, under wal_level=minimal."

author Noah Misch <noah@leadboat.com>

Sun, 22 Mar 2020 16:24:09 +0000 (09:24 -0700)

committer Noah Misch <noah@leadboat.com>

Sun, 22 Mar 2020 16:24:15 +0000 (09:24 -0700)
author Noah Misch <noah@leadboat.com>
Sun, 22 Mar 2020 16:24:09 +0000 (09:24 -0700)
committer Noah Misch <noah@leadboat.com>
Sun, 22 Mar 2020 16:24:15 +0000 (09:24 -0700)
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml

index 6e9f6941311fdaa96f46fe789abc24087e1d4aae..943cbe656efda125c04aeec23195008047114516 100644 (file)
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -1977,19 +1977,16 @@ include_dir 'conf.d'
          levels.  This parameter can only be set at server start.
         </para>
         <para>
-        In <literal>minimal</literal> level, no information is logged for
-        permanent relations for the remainder of a transaction that creates or
-        rewrites them.  This can make operations much faster (see
-        <xref linkend="populate-pitr">).  Operations that initiate this
-        optimization include:
+        In <literal>minimal</> level, WAL-logging of some bulk
+        operations can be safely skipped, which can make those
+        operations much faster (see <xref linkend="populate-pitr">).
+        Operations in which this optimization can be applied include:
          <simplelist>
-         <member><command>ALTER ... SET TABLESPACE</command></member>
-         <member><command>CLUSTER</command></member>
-         <member><command>CREATE TABLE</command></member>
-         <member><command>REFRESH MATERIALIZED VIEW</command>
-         (without <option>CONCURRENTLY</option>)</member>
-         <member><command>REINDEX</command></member>
-         <member><command>TRUNCATE</command></member>
+         <member><command>CREATE TABLE AS</></member>
+         <member><command>CREATE INDEX</></member>
+         <member><command>CLUSTER</></member>
+         <member><command>COPY</> into tables that were created or truncated in the same
+         transaction</member>
          </simplelist>
          But minimal WAL does not contain enough information to reconstruct the
          data from a base backup and the WAL logs, so <literal>archive</> or
@@ -2359,26 +2356,6 @@ include_dir 'conf.d'
        </listitem>
       </varlistentry>
  
-     <varlistentry id="guc-wal-skip-threshold" xreflabel="wal_skip_threshold">
-      <term><varname>wal_skip_threshold</varname> (<type>integer</type>)
-      <indexterm>
-       <primary><varname>wal_skip_threshold</varname> configuration parameter</primary>
-      </indexterm>
-      </term>
-      <listitem>
-       <para>
-        When <varname>wal_level</varname> is <literal>minimal</literal> and a
-        transaction commits after creating or rewriting a permanent relation,
-        this setting determines how to persist the new data.  If the data is
-        smaller than this setting, write it to the WAL log; otherwise, use an
-        fsync of affected files.  Depending on the properties of your storage,
-        raising or lowering this value might help if such commits are slowing
-        concurrent transactions.  The default is two megabytes
-        (<literal>2MB</literal>).
-       </para>
-      </listitem>
-     </varlistentry>
-
       <varlistentry id="guc-commit-delay" xreflabel="commit_delay">
        <term><varname>commit_delay</varname> (<type>integer</type>)
        <indexterm>
diff --git a/doc/src/sgml/perform.sgml b/doc/src/sgml/perform.sgml

index 981080e92d9a5ad3dabe946f6ea5c6a68da64071..9a1c21a54254b047bb0d4f4f59f437e7c537e5d9 100644 (file)
--- a/doc/src/sgml/perform.sgml
+++ b/doc/src/sgml/perform.sgml
@@ -1394,13 +1394,42 @@ SELECT * FROM x, y, a, b, c WHERE something AND somethingelse;
     </para>
  
     <para>
-    Aside from avoiding the time for the archiver or WAL sender to process the
-    WAL data, doing this will actually make certain commands faster, because
-    they do not to write WAL at all if <varname>wal_level</varname>
-    is <literal>minimal</literal> and the current subtransaction (or top-level
-    transaction) created or truncated the table or index they change.  (They
-    can guarantee crash safety more cheaply by doing
-    an <function>fsync</function> at the end than by writing WAL.)
+    Aside from avoiding the time for the archiver or WAL sender to
+    process the WAL data,
+    doing this will actually make certain commands faster, because they
+    are designed not to write WAL at all if <varname>wal_level</varname>
+    is <literal>minimal</>.  (They can guarantee crash safety more cheaply
+    by doing an <function>fsync</> at the end than by writing WAL.)
+    This applies to the following commands:
+    <itemizedlist>
+     <listitem>
+      <para>
+       <command>CREATE TABLE AS SELECT</command>
+      </para>
+     </listitem>
+     <listitem>
+      <para>
+       <command>CREATE INDEX</command> (and variants such as
+       <command>ALTER TABLE ADD PRIMARY KEY</command>)
+      </para>
+     </listitem>
+     <listitem>
+      <para>
+       <command>ALTER TABLE SET TABLESPACE</command>
+      </para>
+     </listitem>
+     <listitem>
+      <para>
+       <command>CLUSTER</command>
+      </para>
+     </listitem>
+     <listitem>
+      <para>
+       <command>COPY FROM</command>, when the target table has been
+       created or truncated earlier in the same transaction
+      </para>
+     </listitem>
+    </itemizedlist>
     </para>
    </sect2>
  
diff --git a/src/backend/access/gist/gistbuild.c b/src/backend/access/gist/gistbuild.c

index 0444e3a107244b9f35fd2d605a4aa2726063a1f5..ff888e2e01d299f53e9bd59dd8803d15ed647b93 100644 (file)
--- a/src/backend/access/gist/gistbuild.c
+++ b/src/backend/access/gist/gistbuild.c
@@ -191,7 +191,7 @@ gistbuild(PG_FUNCTION_ARGS)
                 PageSetLSN(page, recptr);
         }
         else
-               PageSetLSN(page, gistGetFakeLSN(index));
+               PageSetLSN(page, gistGetFakeLSN(heap));
  
         UnlockReleaseBuffer(buffer);
  
diff --git a/src/backend/access/gist/gistutil.c b/src/backend/access/gist/gistutil.c

index 47cb7fde51886d0f2764d6b6c287332645ac2916..7d596a3e2e68c2b0be7b7f4e89ea5940579803b1 100644 (file)
--- a/src/backend/access/gist/gistutil.c
+++ b/src/backend/access/gist/gistutil.c
@@ -840,44 +840,23 @@ gistoptions(PG_FUNCTION_ARGS)
  }
  
  /*
- * Some indexes are not WAL-logged, but we need LSNs to detect concurrent page
- * splits anyway. This function provides a fake sequence of LSNs for that
- * purpose.
+ * Temporary and unlogged GiST indexes are not WAL-logged, but we need LSNs
+ * to detect concurrent page splits anyway. This function provides a fake
+ * sequence of LSNs for that purpose.
   */
  XLogRecPtr
  gistGetFakeLSN(Relation rel)
  {
+       static XLogRecPtr counter = 1;
+
         if (rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP)
         {
                 /*
                  * Temporary relations are only accessible in our session, so a simple
                  * backend-local counter will do.
                  */
-               static XLogRecPtr counter = 1;
-
                 return counter++;
         }
-       else if (rel->rd_rel->relpersistence == RELPERSISTENCE_PERMANENT)
-       {
-               /*
-                * WAL-logging on this relation will start after commit, so its LSNs
-                * must be distinct numbers smaller than the LSN at the next commit.
-                * Emit a dummy WAL record if insert-LSN hasn't advanced after the
-                * last call.
-                */
-               static XLogRecPtr lastlsn = InvalidXLogRecPtr;
-               XLogRecPtr      currlsn = GetXLogInsertRecPtr();
-
-               /* Shouldn't be called for WAL-logging relations */
-               Assert(!RelationNeedsWAL(rel));
-
-               /* No need for an actual record if we already have a distinct LSN */
-               if (!XLogRecPtrIsInvalid(lastlsn) && lastlsn == currlsn)
-                       currlsn = gistXLogAssignLSN();
-
-               lastlsn = currlsn;
-               return currlsn;
-       }
         else
         {
                 /*
diff --git a/src/backend/access/gist/gistxlog.c b/src/backend/access/gist/gistxlog.c

index e002ad3d8299375a327323f4e5d4a7c06e746637..fbdbb3c51f2e7fc7e40c918bffe063537836233e 100644 (file)
--- a/src/backend/access/gist/gistxlog.c
+++ b/src/backend/access/gist/gistxlog.c
@@ -301,9 +301,6 @@ gist_redo(XLogReaderState *record)
                 case XLOG_GIST_CREATE_INDEX:
                         gistRedoCreateIndex(record);
                         break;
-               case XLOG_GIST_ASSIGN_LSN:
-                       /* nop. See gistGetFakeLSN(). */
-                       break;
                 default:
                         elog(PANIC, "gist_redo: unknown op code %u", info);
         }
@@ -380,23 +377,6 @@ gistXLogSplit(RelFileNode node, BlockNumber blkno, bool page_is_leaf,
         return recptr;
  }
  
-/*
- * Write an empty XLOG record to assign a distinct LSN.
- */
-XLogRecPtr
-gistXLogAssignLSN(void)
-{
-       int                     dummy = 0;
-
-       /*
-        * Records other than SWITCH_WAL must have content. We use an integer 0 to
-        * follow the restriction.
-        */
-       XLogBeginInsert();
-       XLogRegisterData((char *) &dummy, sizeof(dummy));
-       return XLogInsert(RM_GIST_ID, XLOG_GIST_ASSIGN_LSN);
-}
-
  /*
   * Write XLOG record describing a page update. The update can include any
   * number of deletions and/or insertions of tuples on a single index page.
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c

index f605c1abaeeceab40beb80c82c64066ef2fc2cb0..9554704456c92170acf282e523888c4851dc1dab 100644 (file)
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -27,6 +27,7 @@
   *             heap_multi_insert - insert multiple tuples into a relation
   *             heap_delete             - delete a tuple from a relation
   *             heap_update             - replace a tuple in a relation with another tuple
+ *             heap_sync               - sync heap, for when no WAL has been written
   *
   * NOTES
   *       This file contains the heap_ routines which implement
@@ -2102,6 +2103,12 @@ FreeBulkInsertState(BulkInsertState bistate)
   * The new tuple is stamped with current transaction ID and the specified
   * command ID.
   *
+ * If the HEAP_INSERT_SKIP_WAL option is specified, the new tuple is not
+ * logged in WAL, even for a non-temp relation.  Safe usage of this behavior
+ * requires that we arrange that all new tuples go into new pages not
+ * containing any tuples from other transactions, and that the relation gets
+ * fsync'd before commit.  (See also heap_sync() comments)
+ *
   * The HEAP_INSERT_SKIP_FSM option is passed directly to
   * RelationGetBufferForTuple, which see for more info.
   *
@@ -2210,7 +2217,7 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
         MarkBufferDirty(buffer);
  
         /* XLOG stuff */
-       if (RelationNeedsWAL(relation))
+       if (!(options & HEAP_INSERT_SKIP_WAL) && RelationNeedsWAL(relation))
         {
                 xl_heap_insert xlrec;
                 xl_heap_header xlhdr;
@@ -2418,7 +2425,7 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
         /* currently not needed (thus unsupported) for heap_multi_insert() */
         AssertArg(!(options & HEAP_INSERT_NO_LOGICAL));
  
-       needwal = RelationNeedsWAL(relation);
+       needwal = !(options & HEAP_INSERT_SKIP_WAL) && RelationNeedsWAL(relation);
         saveFreeSpace = RelationGetTargetPageFreeSpace(relation,
                                                                                                    HEAP_DEFAULT_FILLFACTOR);
  
@@ -8746,13 +8753,18 @@ heap2_redo(XLogReaderState *record)
  }
  
  /*
- *     heap_sync               - for binary compatibility
- *
- * A newer PostgreSQL version removes this function.  It exists here just in
- * case an extension calls it.  See "Skipping WAL for New RelFileNode" in
- * src/backend/access/transam/README for the system that superseded it,
- * allowing removal of most calls.  Cases like copy_relation_data() should
- * call smgrimmedsync() directly.
+ *     heap_sync               - sync a heap, for use when no WAL has been written
+ *
+ * This forces the heap contents (including TOAST heap if any) down to disk.
+ * If we skipped using WAL, and WAL is otherwise needed, we must force the
+ * relation down to disk before it's safe to commit the transaction.  This
+ * requires writing out any dirty buffers and then doing a forced fsync.
+ *
+ * Indexes are not touched.  (Currently, index operations associated with
+ * the commands that use this are WAL-logged and so do not need fsync.
+ * That behavior might change someday, but in any case it's likely that
+ * any fsync decisions required would be per-index and hence not appropriate
+ * to be done here.)
   */
  void
  heap_sync(Relation rel)
diff --git a/src/backend/access/heap/rewriteheap.c b/src/backend/access/heap/rewriteheap.c

index e7a24732cc4df6cb77c37c8b5294707515a4287c..7f1b798f72ba4e86fe82c40818c9847fdc81b16c 100644 (file)
--- a/src/backend/access/heap/rewriteheap.c
+++ b/src/backend/access/heap/rewriteheap.c
@@ -143,6 +143,7 @@ typedef struct RewriteStateData
         Page            rs_buffer;              /* page currently being built */
         BlockNumber rs_blockno;         /* block where page will go */
         bool            rs_buffer_valid;        /* T if any tuples in buffer */
+       bool            rs_use_wal;             /* must we WAL-log inserts? */
         bool            rs_logical_rewrite;             /* do we need to do logical rewriting */
         TransactionId rs_oldest_xmin;           /* oldest xmin used by caller to
                                                                                  * determine tuple visibility */
@@ -236,13 +237,15 @@ static void logical_end_heap_rewrite(RewriteState state);
   * oldest_xmin xid used by the caller to determine which tuples are dead
   * freeze_xid  xid before which tuples will be frozen
   * min_multi   multixact before which multis will be removed
+ * use_wal             should the inserts to the new heap be WAL-logged?
   *
   * Returns an opaque RewriteState, allocated in current memory context,
   * to be used in subsequent calls to the other functions.
   */
  RewriteState
  begin_heap_rewrite(Relation old_heap, Relation new_heap, TransactionId oldest_xmin,
-                                  TransactionId freeze_xid, MultiXactId cutoff_multi)
+                                  TransactionId freeze_xid, MultiXactId cutoff_multi,
+                                  bool use_wal)
  {
         RewriteState state;
         MemoryContext rw_cxt;
@@ -269,6 +272,7 @@ begin_heap_rewrite(Relation old_heap, Relation new_heap, TransactionId oldest_xm
         /* new_heap needn't be empty, just locked */
         state->rs_blockno = RelationGetNumberOfBlocks(new_heap);
         state->rs_buffer_valid = false;
+       state->rs_use_wal = use_wal;
         state->rs_oldest_xmin = oldest_xmin;
         state->rs_freeze_xid = freeze_xid;
         state->rs_cutoff_multi = cutoff_multi;
@@ -327,7 +331,7 @@ end_heap_rewrite(RewriteState state)
         /* Write the last page, if any */
         if (state->rs_buffer_valid)
         {
-               if (RelationNeedsWAL(state->rs_new_rel))
+               if (state->rs_use_wal)
                         log_newpage(&state->rs_new_rel->rd_node,
                                                 MAIN_FORKNUM,
                                                 state->rs_blockno,
@@ -342,14 +346,18 @@ end_heap_rewrite(RewriteState state)
         }
  
         /*
-        * When we WAL-logged rel pages, we must nonetheless fsync them.  The
+        * If the rel is WAL-logged, must fsync before commit.  We use heap_sync
+        * to ensure that the toast table gets fsync'd too.
+        *
+        * It's obvious that we must do this when not WAL-logging. It's less
+        * obvious that we have to do it even if we did WAL-log the pages. The
          * reason is the same as in tablecmds.c's copy_relation_data(): we're
          * writing data that's not in shared buffers, and so a CHECKPOINT
          * occurring during the rewriteheap operation won't have fsync'd data we
          * wrote before the checkpoint.
          */
         if (RelationNeedsWAL(state->rs_new_rel))
-               smgrimmedsync(state->rs_new_rel->rd_smgr, MAIN_FORKNUM);
+               heap_sync(state->rs_new_rel);
  
         logical_end_heap_rewrite(state);
  
@@ -646,6 +654,9 @@ raw_heap_insert(RewriteState state, HeapTuple tup)
         {
                 int options = HEAP_INSERT_SKIP_FSM;
  
+               if (!state->rs_use_wal)
+                       options |= HEAP_INSERT_SKIP_WAL;
+
                 /*
                  * While rewriting the heap for VACUUM FULL / CLUSTER, make sure data
                  * for the TOAST table are not logically decoded.  The main heap is
@@ -684,7 +695,7 @@ raw_heap_insert(RewriteState state, HeapTuple tup)
                         /* Doesn't fit, so write out the existing page */
  
                         /* XLOG stuff */
-                       if (RelationNeedsWAL(state->rs_new_rel))
+                       if (state->rs_use_wal)
                                 log_newpage(&state->rs_new_rel->rd_node,
                                                         MAIN_FORKNUM,
                                                         state->rs_blockno,
diff --git a/src/backend/access/nbtree/nbtsort.c b/src/backend/access/nbtree/nbtsort.c

index 7b88e977196afeac902e3a26ef84ea783658ae92..f95f67ad4b5ceb7f9af4fb0e12ebd115b0a6e336 100644 (file)
--- a/src/backend/access/nbtree/nbtsort.c
+++ b/src/backend/access/nbtree/nbtsort.c
@@ -40,6 +40,18 @@
   * them.  They will need to be re-read into shared buffers on first use after
   * the build finishes.
   *
+ * Since the index will never be used unless it is completely built,
+ * from a crash-recovery point of view there is no need to WAL-log the
+ * steps of the build.  After completing the index build, we can just sync
+ * the whole file to disk using smgrimmedsync() before exiting this module.
+ * This can be seen to be sufficient for crash recovery by considering that
+ * it's effectively equivalent to what would happen if a CHECKPOINT occurred
+ * just after the index build.  However, it is clearly not sufficient if the
+ * DBA is using the WAL log for PITR or replication purposes, since another
+ * machine would not be able to reconstruct the index from WAL.  Therefore,
+ * we log the completed index pages to WAL if and only if WAL archiving is
+ * active.
+ *
   * This code isn't concerned about the FSM at all. The caller is responsible
   * for initializing that.
   *
@@ -204,7 +216,12 @@ _bt_leafbuild(BTSpool *btspool, BTSpool *btspool2)
  
         wstate.heap = btspool->heap;
         wstate.index = btspool->index;
-       wstate.btws_use_wal = RelationNeedsWAL(wstate.index);
+
+       /*
+        * We need to log index creation in WAL iff WAL archiving/streaming is
+        * enabled UNLESS the index isn't WAL-logged anyway.
+        */
+       wstate.btws_use_wal = XLogIsNeeded() && RelationNeedsWAL(wstate.index);
  
         /* reserve the metapage */
         wstate.btws_pages_alloced = BTREE_METAPAGE + 1;
@@ -794,15 +811,21 @@ _bt_load(BTWriteState *wstate, BTSpool *btspool, BTSpool *btspool2)
         _bt_uppershutdown(wstate, state);
  
         /*
-        * When we WAL-logged index pages, we must nonetheless fsync index files.
-        * Since we're building outside shared buffers, a CHECKPOINT occurring
-        * during the build has no way to flush the previously written data to
-        * disk (indeed it won't know the index even exists).  A crash later on
-        * would replay WAL from the checkpoint, therefore it wouldn't replay our
-        * earlier WAL entries. If we do not fsync those pages here, they might
-        * still not be on disk when the crash occurs.
+        * If the index is WAL-logged, we must fsync it down to disk before it's
+        * safe to commit the transaction.  (For a non-WAL-logged index we don't
+        * care since the index will be uninteresting after a crash anyway.)
+        *
+        * It's obvious that we must do this when not WAL-logging the build. It's
+        * less obvious that we have to do it even if we did WAL-log the index
+        * pages.  The reason is that since we're building outside shared buffers,
+        * a CHECKPOINT occurring during the build has no way to flush the
+        * previously written data to disk (indeed it won't know the index even
+        * exists).  A crash later on would replay WAL from the checkpoint,
+        * therefore it wouldn't replay our earlier WAL entries. If we do not
+        * fsync those pages here, they might still not be on disk when the crash
+        * occurs.
          */
-       if (wstate->btws_use_wal)
+       if (RelationNeedsWAL(wstate->index))
         {
                 RelationOpenSmgr(wstate->index);
                 smgrimmedsync(wstate->index->rd_smgr, MAIN_FORKNUM);
diff --git a/src/backend/access/rmgrdesc/gistdesc.c b/src/backend/access/rmgrdesc/gistdesc.c

index d53cdc6984fa6340b2941d2918d979cb1634977b..b199c6fa20795406b3cb5f3563b193a02dfa05d1 100644 (file)
--- a/src/backend/access/rmgrdesc/gistdesc.c
+++ b/src/backend/access/rmgrdesc/gistdesc.c
@@ -46,9 +46,6 @@ gist_desc(StringInfo buf, XLogReaderState *record)
                         break;
                 case XLOG_GIST_CREATE_INDEX:
                         break;
-               case XLOG_GIST_ASSIGN_LSN:
-                       /* No details to write out */
-                       break;
         }
  }
  
@@ -68,9 +65,6 @@ gist_identify(uint8 info)
                 case XLOG_GIST_CREATE_INDEX:
                         id = "CREATE_INDEX";
                         break;
-               case XLOG_GIST_ASSIGN_LSN:
-                       id = "ASSIGN_LSN";
-                       break;
         }
  
         return id;
diff --git a/src/backend/access/transam/README b/src/backend/access/transam/README

index 27322713a0c765f0195de89c16b326812ec88906..81b27a119a0ef86c12bce4250fe02b23ca069038 100644 (file)
--- a/src/backend/access/transam/README
+++ b/src/backend/access/transam/README
@@ -714,38 +714,6 @@ then restart recovery.  This is part of the reason for not writing a WAL
  entry until we've successfully done the original action.
  
  
-Skipping WAL for New RelFileNode
---------------------------------
-
-Under wal_level=minimal, if a change modifies a relfilenode that ROLLBACK
-would unlink, in-tree access methods write no WAL for that change.  Code that
-writes WAL without calling RelationNeedsWAL() must check for this case.  This
-skipping is mandatory.  If a WAL-writing change preceded a WAL-skipping change
-for the same block, REDO could overwrite the WAL-skipping change.  If a
-WAL-writing change followed a WAL-skipping change for the same block, a
-related problem would arise.  When a WAL record contains no full-page image,
-REDO expects the page to match its contents from just before record insertion.
-A WAL-skipping change may not reach disk at all, violating REDO's expectation
-under full_page_writes=off.  For any access method, CommitTransaction() writes
-and fsyncs affected blocks before recording the commit.
-
-Prefer to do the same in future access methods.  However, two other approaches
-can work.  First, an access method can irreversibly transition a given fork
-from WAL-skipping to WAL-writing by calling FlushRelationBuffers() and
-smgrimmedsync().  Second, an access method can opt to write WAL
-unconditionally for permanent relations.  Under these approaches, the access
-method callbacks must not call functions that react to RelationNeedsWAL().
-
-This applies only to WAL records whose replay would modify bytes stored in the
-new relfilenode.  It does not apply to other records about the relfilenode,
-such as XLOG_SMGR_CREATE.  Because it operates at the level of individual
-relfilenodes, RelationNeedsWAL() can differ for tightly-coupled relations.
-Consider "CREATE TABLE t (); BEGIN; ALTER TABLE t ADD c text; ..." in which
-ALTER TABLE adds a TOAST relation.  The TOAST relation will skip WAL, while
-the table owning it will not.  ALTER TABLE SET TABLESPACE will cause a table
-to skip WAL, but that won't affect its indexes.
-
-
  Asynchronous Commit
  -------------------
  
@@ -845,12 +813,13 @@ Changes to a temp table are not WAL-logged, hence could reach disk in
  advance of T1's commit, but we don't care since temp table contents don't
  survive crashes anyway.
  
-Database writes that skip WAL for new relfilenodes are also safe.  In these
-cases it's entirely possible for the data to reach disk before T1's commit,
-because T1 will fsync it down to disk without any sort of interlock.  However,
-all these paths are designed to write data that no other transaction can see
-until after T1 commits.  The situation is thus not different from ordinary
-WAL-logged updates.
+Database writes made via any of the paths we have introduced to avoid WAL
+overhead for bulk updates are also safe.  In these cases it's entirely
+possible for the data to reach disk before T1's commit, because T1 will
+fsync it down to disk without any sort of interlock, as soon as it finishes
+the bulk update.  However, all these paths are designed to write data that
+no other transaction can see until after T1 commits.  The situation is thus
+not different from ordinary WAL-logged updates.
  
  Transaction Emulation during Recovery
  -------------------------------------
diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c

index eeec2b669a34bfc9a6c35788d11d26de770c28c2..03cadb018f45269d4b0519b75ad758c6af8287cc 100644 (file)
--- a/src/backend/access/transam/xact.c
+++ b/src/backend/access/transam/xact.c
@@ -2018,13 +2018,6 @@ CommitTransaction(void)
          */
         PreCommit_on_commit_actions();
  
-       /*
-        * Synchronize files that are created and not WAL-logged during this
-        * transaction. This must happen before AtEOXact_RelationMap(), so that we
-        * don't see committed-but-broken files after a crash.
-        */
-       smgrDoPendingSyncs(true);
-
         /* close large objects before lower-level cleanup */
         AtEOXact_LargeObject(true);
  
@@ -2253,13 +2246,6 @@ PrepareTransaction(void)
          */
         PreCommit_on_commit_actions();
  
-       /*
-        * Synchronize files that are created and not WAL-logged during this
-        * transaction. This must happen before EndPrepare(), so that we don't see
-        * committed-but-broken files after a crash and COMMIT PREPARED.
-        */
-       smgrDoPendingSyncs(true);
-
         /* close large objects before lower-level cleanup */
         AtEOXact_LargeObject(true);
  
@@ -2556,7 +2542,6 @@ AbortTransaction(void)
          */
         AfterTriggerEndXact(false); /* 'false' means it's abort */
         AtAbort_Portals();
-       smgrDoPendingSyncs(false);
         AtEOXact_LargeObject(false);
         AtAbort_Notify();
         AtEOXact_RelationMap(false);
diff --git a/src/backend/access/transam/xlogutils.c b/src/backend/access/transam/xlogutils.c

index 45b1cbd8dcc12b55aeb15eb5e26d3506b5aaab64..c0386d9688946a84afcc1dda4f824404496fd145 100644 (file)
--- a/src/backend/access/transam/xlogutils.c
+++ b/src/backend/access/transam/xlogutils.c
@@ -538,8 +538,6 @@ typedef FakeRelCacheEntryData *FakeRelCacheEntry;
   * fields related to physical storage, like rd_rel, are initialized, so the
   * fake entry is only usable in low-level operations like ReadBuffer().
   *
- * This is also used for syncing WAL-skipped files.
- *
   * Caller must free the returned entry with FreeFakeRelcacheEntry().
   */
  Relation
@@ -548,20 +546,18 @@ CreateFakeRelcacheEntry(RelFileNode rnode)
         FakeRelCacheEntry fakeentry;
         Relation        rel;
  
+       Assert(InRecovery);
+
         /* Allocate the Relation struct and all related space in one block. */
         fakeentry = palloc0(sizeof(FakeRelCacheEntryData));
         rel = (Relation) fakeentry;
  
         rel->rd_rel = &fakeentry->pgc;
         rel->rd_node = rnode;
-
-       /*
-        * We will never be working with temp rels during recovery or while
-        * syncing WAL-skipped files.
-        */
+       /* We will never be working with temp rels during recovery */
         rel->rd_backend = InvalidBackendId;
  
-       /* It must be a permanent table here */
+       /* It must be a permanent table if we're in recovery. */
         rel->rd_rel->relpersistence = RELPERSISTENCE_PERMANENT;
  
         /* We don't know the name of the relation; use relfilenode instead */
@@ -570,9 +566,9 @@ CreateFakeRelcacheEntry(RelFileNode rnode)
         /*
          * We set up the lockRelId in case anything tries to lock the dummy
          * relation.  Note that this is fairly bogus since relNode may be
-        * different from the relation's OID.  It shouldn't really matter though.
-        * In recovery, we are running by ourselves and can't have any lock
-        * conflicts.  While syncing, we already hold AccessExclusiveLock.
+        * different from the relation's OID.  It shouldn't really matter though,
+        * since we are presumably running by ourselves and can't have any lock
+        * conflicts ...
          */
         rel->rd_lockInfo.lockRelId.dbId = rnode.dbNode;
         rel->rd_lockInfo.lockRelId.relId = rnode.relNode;
diff --git a/src/backend/bootstrap/bootparse.y b/src/backend/bootstrap/bootparse.y

index 8663d4389462c417ac5b133f8ba9ee479ffdc2be..d8d1b06ff0a4f7911be75320f090b20e6df759e7 100644 (file)
--- a/src/backend/bootstrap/bootparse.y
+++ b/src/backend/bootstrap/bootparse.y
@@ -299,8 +299,6 @@ Boot_DeclareIndexStmt:
                                         stmt->idxcomment = NULL;
                                         stmt->indexOid = InvalidOid;
                                         stmt->oldNode = InvalidOid;
-                                       stmt->oldCreateSubid = InvalidSubTransactionId;
-                                       stmt->oldFirstRelfilenodeSubid = InvalidSubTransactionId;
                                         stmt->unique = false;
                                         stmt->primary = false;
                                         stmt->isconstraint = false;
@@ -344,8 +342,6 @@ Boot_DeclareUniqueIndexStmt:
                                         stmt->idxcomment = NULL;
                                         stmt->indexOid = InvalidOid;
                                         stmt->oldNode = InvalidOid;
-                                       stmt->oldCreateSubid = InvalidSubTransactionId;
-                                       stmt->oldFirstRelfilenodeSubid = InvalidSubTransactionId;
                                         stmt->unique = true;
                                         stmt->primary = false;
                                         stmt->isconstraint = false;
diff --git a/src/backend/catalog/storage.c b/src/backend/catalog/storage.c

index 2e7b36b61c5ad8470dc34efc843709bf948d51e1..d4440c9d1dbeb47235eb8362233b1c034f1dcf62 100644 (file)
--- a/src/backend/catalog/storage.c
+++ b/src/backend/catalog/storage.c
@@ -27,16 +27,11 @@
  #include "catalog/catalog.h"
  #include "catalog/storage.h"
  #include "catalog/storage_xlog.h"
-#include "miscadmin.h"
  #include "storage/freespace.h"
  #include "storage/smgr.h"
-#include "utils/hsearch.h"
  #include "utils/memutils.h"
  #include "utils/rel.h"
  
-/* GUC variables */
-int                    wal_skip_threshold = 2048;      /* in kilobytes */
-
  /*
   * We keep a list of all relations (represented as RelFileNode values)
   * that have been created or deleted in the current transaction.  When
@@ -66,14 +61,7 @@ typedef struct PendingRelDelete
         struct PendingRelDelete *next;          /* linked-list link */
  } PendingRelDelete;
  
-typedef struct pendingSync
-{
-       RelFileNode rnode;
-       bool            is_truncated;   /* Has the file experienced truncation? */
-} pendingSync;
-
  static PendingRelDelete *pendingDeletes = NULL; /* head of linked list */
-HTAB      *pendingSyncHash = NULL;
  
  /*
   * RelationCreateStorage
@@ -128,37 +116,6 @@ RelationCreateStorage(RelFileNode rnode, char relpersistence)
         pending->nestLevel = GetCurrentTransactionNestLevel();
         pending->next = pendingDeletes;
         pendingDeletes = pending;
-
-       /*
-        * Queue an at-commit sync.  Bootstrap does not need syncs, because initdb
-        * syncs at the end.  During bootstrap, mdexists() creates the specified
-        * file; smgrDoPendingSyncs() would not cope with that.
-        */
-       if (relpersistence == RELPERSISTENCE_PERMANENT && !XLogIsNeeded() &&
-               !IsBootstrapProcessingMode())
-       {
-               pendingSync *pending;
-               bool            found;
-
-               /* we sync only permanent relations */
-               Assert(backend == InvalidBackendId);
-
-               if (!pendingSyncHash)
-               {
-                       HASHCTL         ctl;
-
-                       ctl.keysize = sizeof(RelFileNode);
-                       ctl.entrysize = sizeof(pendingSync);
-                       ctl.hcxt = TopTransactionContext;
-                       pendingSyncHash =
-                               hash_create("pending sync hash",
-                                                       16, &ctl, HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
-               }
-
-               pending = hash_search(pendingSyncHash, &rnode, HASH_ENTER, &found);
-               Assert(!found);
-               pending->is_truncated = false;
-       }
  }
  
  /*
@@ -292,8 +249,6 @@ RelationTruncate(Relation rel, BlockNumber nblocks)
         if (vm)
                 visibilitymap_truncate(rel, nblocks);
  
-       RelationPreTruncate(rel);
-
         /*
          * We WAL-log the truncation before actually truncating, which means
          * trouble if the truncation fails. If we then crash, the WAL replay
@@ -335,49 +290,6 @@ RelationTruncate(Relation rel, BlockNumber nblocks)
         smgrtruncate(rel->rd_smgr, MAIN_FORKNUM, nblocks);
  }
  
-/*
- * RelationPreTruncate
- *             Perform AM-independent work before a physical truncation.
- *
- * If an access method's relation_nontransactional_truncate does not call
- * RelationTruncate(), it must call this before decreasing the table size.
- */
-void
-RelationPreTruncate(Relation rel)
-{
-       pendingSync *pending;
-
-       if (!pendingSyncHash)
-               return;
-       RelationOpenSmgr(rel);
-
-       pending = hash_search(pendingSyncHash, &(rel->rd_smgr->smgr_rnode.node),
-                                                 HASH_FIND, NULL);
-       if (pending)
-               pending->is_truncated = true;
-}
-
-/*
- * RelFileNodeSkippingWAL - check if a BM_PERMANENT relfilenode is using WAL
- *
- *   Changes of certain relfilenodes must not write WAL; see "Skipping WAL for
- *   New RelFileNode" in src/backend/access/transam/README.  Though it is
- *   known from Relation efficiently, this function is intended for the code
- *   paths not having access to Relation.
- */
-bool
-RelFileNodeSkippingWAL(RelFileNode rnode)
-{
-       if (XLogIsNeeded())
-               return false;                   /* no permanent relfilenode skips WAL */
-
-       if (!pendingSyncHash ||
-               hash_search(pendingSyncHash, &rnode, HASH_FIND, NULL) == NULL)
-               return false;
-
-       return true;
-}
-
  /*
   *     smgrDoPendingDeletes() -- Take care of relation deletes at end of xact.
   *
@@ -455,144 +367,6 @@ smgrDoPendingDeletes(bool isCommit)
         }
  }
  
-/*
- *     smgrDoPendingSyncs() -- Take care of relation syncs at end of xact.
- */
-void
-smgrDoPendingSyncs(bool isCommit)
-{
-       PendingRelDelete *pending;
-       int                     nrels = 0,
-                               maxrels = 0;
-       SMgrRelation *srels = NULL;
-       HASH_SEQ_STATUS scan;
-       pendingSync *pendingsync;
-
-       if (XLogIsNeeded())
-               return;                                 /* no relation can use this */
-
-       Assert(GetCurrentTransactionNestLevel() == 1);
-
-       if (!pendingSyncHash)
-               return;                                 /* no relation needs sync */
-
-       /* Just throw away all pending syncs if any at rollback */
-       if (!isCommit)
-       {
-               pendingSyncHash = NULL;
-               return;
-       }
-
-       AssertPendingSyncs_RelationCache();
-
-       /* Skip syncing nodes that smgrDoPendingDeletes() will delete. */
-       for (pending = pendingDeletes; pending != NULL; pending = pending->next)
-       {
-               if (!pending->atCommit)
-                       continue;
-
-               (void) hash_search(pendingSyncHash, (void *) &pending->relnode,
-                                                  HASH_REMOVE, NULL);
-       }
-
-       hash_seq_init(&scan, pendingSyncHash);
-       while ((pendingsync = (pendingSync *) hash_seq_search(&scan)))
-       {
-               ForkNumber      fork;
-               BlockNumber nblocks[MAX_FORKNUM + 1];
-               BlockNumber total_blocks = 0;
-               SMgrRelation srel;
-
-               srel = smgropen(pendingsync->rnode, InvalidBackendId);
-
-               /*
-                * We emit newpage WAL records for smaller relations.
-                *
-                * Small WAL records have a chance to be emitted along with other
-                * backends' WAL records.  We emit WAL records instead of syncing for
-                * files that are smaller than a certain threshold, expecting faster
-                * commit.  The threshold is defined by the GUC wal_skip_threshold.
-                */
-               if (!pendingsync->is_truncated)
-               {
-                       for (fork = 0; fork <= MAX_FORKNUM; fork++)
-                       {
-                               if (smgrexists(srel, fork))
-                               {
-                                       BlockNumber n = smgrnblocks(srel, fork);
-
-                                       /* we shouldn't come here for unlogged relations */
-                                       Assert(fork != INIT_FORKNUM);
-                                       nblocks[fork] = n;
-                                       total_blocks += n;
-                               }
-                               else
-                                       nblocks[fork] = InvalidBlockNumber;
-                       }
-               }
-
-               /*
-                * Sync file or emit WAL records for its contents.
-                *
-                * Although we emit WAL record if the file is small enough, do file
-                * sync regardless of the size if the file has experienced a
-                * truncation. It is because the file would be followed by trailing
-                * garbage blocks after a crash recovery if, while a past longer file
-                * had been flushed out, we omitted syncing-out of the file and
-                * emitted WAL instead.  You might think that we could choose WAL if
-                * the current main fork is longer than ever, but there's a case where
-                * main fork is longer than ever but FSM fork gets shorter.
-                */
-               if (pendingsync->is_truncated ||
-                       total_blocks * BLCKSZ / 1024 >= wal_skip_threshold)
-               {
-                       /* allocate the initial array, or extend it, if needed */
-                       if (maxrels == 0)
-                       {
-                               maxrels = 8;
-                               srels = palloc(sizeof(SMgrRelation) * maxrels);
-                       }
-                       else if (maxrels <= nrels)
-                       {
-                               maxrels *= 2;
-                               srels = repalloc(srels, sizeof(SMgrRelation) * maxrels);
-                       }
-
-                       srels[nrels++] = srel;
-               }
-               else
-               {
-                       /* Emit WAL records for all blocks.  The file is small enough. */
-                       for (fork = 0; fork <= MAX_FORKNUM; fork++)
-                       {
-                               int                     n = nblocks[fork];
-                               Relation        rel;
-
-                               if (!BlockNumberIsValid(n))
-                                       continue;
-
-                               /*
-                                * Emit WAL for the whole file.  Unfortunately we don't know
-                                * what kind of a page this is, so we have to log the full
-                                * page including any unused space.  ReadBufferExtended()
-                                * counts some pgstat events; unfortunately, we discard them.
-                                */
-                               rel = CreateFakeRelcacheEntry(srel->smgr_rnode.node);
-                               log_newpage_range(rel, fork, 0, n, false);
-                               FreeFakeRelcacheEntry(rel);
-                       }
-               }
-       }
-
-       pendingSyncHash = NULL;
-
-       if (nrels > 0)
-       {
-               smgrdosyncall(srels, nrels);
-               pfree(srels);
-       }
-}
-
  /*
   * smgrGetPendingDeletes() -- Get a list of non-temp relations to be deleted.
   *
diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c

index 0cfb2338aaa6d78d5bca59c73f4271300c272383..1c90f8c3382838be89ffd94e3688325019614382 100644 (file)
--- a/src/backend/commands/cluster.c
+++ b/src/backend/commands/cluster.c
@@ -747,6 +747,7 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose,
         bool       *isnull;
         IndexScanDesc indexScan;
         HeapScanDesc heapScan;
+       bool            use_wal;
         bool            is_system_catalog;
         TransactionId OldestXmin;
         TransactionId FreezeXid;
@@ -802,9 +803,12 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose,
                 LockRelationOid(OldHeap->rd_rel->reltoastrelid, AccessExclusiveLock);
  
         /*
-        * Valid smgr_targblock implies something already wrote to the relation.
-        * This may be harmless, but this function hasn't planned for it.
+        * We need to log the copied data in WAL iff WAL archiving/streaming is
+        * enabled AND it's a WAL-logged rel.
          */
+       use_wal = XLogIsNeeded() && RelationNeedsWAL(NewHeap);
+
+       /* use_wal off requires smgr_targblock be initially invalid */
         Assert(RelationGetTargetBlock(NewHeap) == InvalidBlockNumber);
  
         /*
@@ -872,7 +876,7 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose,
  
         /* Initialize the rewrite operation */
         rwstate = begin_heap_rewrite(OldHeap, NewHeap, OldestXmin, FreezeXid,
-                                                                MultiXactCutoff);
+                                                                MultiXactCutoff, use_wal);
  
         /*
          * Decide whether to use an indexscan or seqscan-and-optional-sort to scan
@@ -1242,25 +1246,6 @@ swap_relation_files(Oid r1, Oid r2, bool target_is_pg_class,
                 *mapped_tables++ = r2;
         }
  
-       /*
-        * Recognize that rel1's relfilenode (swapped from rel2) is new in this
-        * subtransaction. The rel2 storage (swapped from rel1) may or may not be
-        * new.
-        */
-       {
-               Relation        rel1,
-                                       rel2;
-
-               rel1 = relation_open(r1, NoLock);
-               rel2 = relation_open(r2, NoLock);
-               rel2->rd_createSubid = rel1->rd_createSubid;
-               rel2->rd_newRelfilenodeSubid = rel1->rd_newRelfilenodeSubid;
-               rel2->rd_firstRelfilenodeSubid = rel1->rd_firstRelfilenodeSubid;
-               RelationAssumeNewRelfilenode(rel1);
-               relation_close(rel1, NoLock);
-               relation_close(rel2, NoLock);
-       }
-
         /*
          * In the case of a shared catalog, these next few steps will only affect
          * our own database's pg_class row; but that's okay, because they are all
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c

index f053f70c6e2d19f3ce4ebdc2b91bdbade2b578c0..9276f775c412c8209fba903ef6dcc9b51e5244db 100644 (file)
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -2274,14 +2274,49 @@ CopyFrom(CopyState cstate)
  
         tupDesc = RelationGetDescr(cstate->rel);
  
-       /*
-        * If the target file is new-in-transaction, we assume that checking FSM
-        * for free space is a waste of time.  This could possibly be wrong, but
-        * it's unlikely.
+       /*----------
+        * Check to see if we can avoid writing WAL
+        *
+        * If archive logging/streaming is not enabled *and* either
+        *      - table was created in same transaction as this COPY
+        *      - data is being written to relfilenode created in this transaction
+        * then we can skip writing WAL.  It's safe because if the transaction
+        * doesn't commit, we'll discard the table (or the new relfilenode file).
+        * If it does commit, we'll have done the heap_sync at the bottom of this
+        * routine first.
+        *
+        * As mentioned in comments in utils/rel.h, the in-same-transaction test
+        * is not always set correctly, since in rare cases rd_newRelfilenodeSubid
+        * can be cleared before the end of the transaction. The exact case is
+        * when a relation sets a new relfilenode twice in same transaction, yet
+        * the second one fails in an aborted subtransaction, e.g.
+        *
+        * BEGIN;
+        * TRUNCATE t;
+        * SAVEPOINT save;
+        * TRUNCATE t;
+        * ROLLBACK TO save;
+        * COPY ...
+        *
+        * Also, if the target file is new-in-transaction, we assume that checking
+        * FSM for free space is a waste of time, even if we must use WAL because
+        * of archiving.  This could possibly be wrong, but it's unlikely.
+        *
+        * The comments for heap_insert and RelationGetBufferForTuple specify that
+        * skipping WAL logging is only safe if we ensure that our tuples do not
+        * go into pages containing tuples from any other transactions --- but this
+        * must be the case if we have a new table or new relfilenode, so we need
+        * no additional work to enforce that.
+        *----------
          */
+       /* createSubid is creation check, newRelfilenodeSubid is truncation check */
         if (cstate->rel->rd_createSubid != InvalidSubTransactionId ||
-               cstate->rel->rd_firstRelfilenodeSubid != InvalidSubTransactionId)
+               cstate->rel->rd_newRelfilenodeSubid != InvalidSubTransactionId)
+       {
                 hi_options |= HEAP_INSERT_SKIP_FSM;
+               if (!XLogIsNeeded())
+                       hi_options |= HEAP_INSERT_SKIP_WAL;
+       }
  
         /*
          * Optimize if new relfilenode was created in this subxact or one of its
@@ -2540,6 +2575,13 @@ CopyFrom(CopyState cstate)
  
         FreeExecutorState(estate);
  
+       /*
+        * If we skipped writing WAL, then we need to sync the heap (but not
+        * indexes since those use WAL anyway)
+        */
+       if (hi_options & HEAP_INSERT_SKIP_WAL)
+               heap_sync(cstate->rel);
+
         return processed;
  }
  
diff --git a/src/backend/commands/createas.c b/src/backend/commands/createas.c

index 588a5acc108ff38a04ba9052473a61fbbe5e84dd..e75b4fd65fed11e91074990c24ffda2e5c2e15b4 100644 (file)
--- a/src/backend/commands/createas.c
+++ b/src/backend/commands/createas.c
@@ -561,13 +561,16 @@ intorel_startup(DestReceiver *self, int operation, TupleDesc typeinfo)
         myState->rel = intoRelationDesc;
         myState->reladdr = intoRelationAddr;
         myState->output_cid = GetCurrentCommandId(true);
-       myState->hi_options = HEAP_INSERT_SKIP_FSM;
-       myState->bistate = GetBulkInsertState();
  
         /*
-        * Valid smgr_targblock implies something already wrote to the relation.
-        * This may be harmless, but this function hasn't planned for it.
+        * We can skip WAL-logging the insertions, unless PITR or streaming
+        * replication is in use. We can skip the FSM in any case.
          */
+       myState->hi_options = HEAP_INSERT_SKIP_FSM |
+               (XLogIsNeeded() ? 0 : HEAP_INSERT_SKIP_WAL);
+       myState->bistate = GetBulkInsertState();
+
+       /* Not using WAL requires smgr_targblock be initially invalid */
         Assert(RelationGetTargetBlock(intoRelationDesc) == InvalidBlockNumber);
  }
  
@@ -611,6 +614,10 @@ intorel_shutdown(DestReceiver *self)
  
         FreeBulkInsertState(myState->bistate);
  
+       /* If we skipped using WAL, must heap_sync before commit */
+       if (myState->hi_options & HEAP_INSERT_SKIP_WAL)
+               heap_sync(myState->rel);
+
         /* close rel, but keep lock until commit */
         heap_close(myState->rel, NoLock);
         myState->rel = NULL;
diff --git a/src/backend/commands/matview.c b/src/backend/commands/matview.c

index 18492e763e01c049d504bacfd44eed56d28cb452..ea6321fc13243443bdfb0cf578dcd8e9c642b546 100644 (file)
--- a/src/backend/commands/matview.c
+++ b/src/backend/commands/matview.c
@@ -403,13 +403,17 @@ transientrel_startup(DestReceiver *self, int operation, TupleDesc typeinfo)
          */
         myState->transientrel = transientrel;
         myState->output_cid = GetCurrentCommandId(true);
-       myState->hi_options = HEAP_INSERT_SKIP_FSM | HEAP_INSERT_FROZEN;
-       myState->bistate = GetBulkInsertState();
  
         /*
-        * Valid smgr_targblock implies something already wrote to the relation.
-        * This may be harmless, but this function hasn't planned for it.
+        * We can skip WAL-logging the insertions, unless PITR or streaming
+        * replication is in use. We can skip the FSM in any case.
          */
+       myState->hi_options = HEAP_INSERT_SKIP_FSM | HEAP_INSERT_FROZEN;
+       if (!XLogIsNeeded())
+               myState->hi_options |= HEAP_INSERT_SKIP_WAL;
+       myState->bistate = GetBulkInsertState();
+
+       /* Not using WAL requires smgr_targblock be initially invalid */
         Assert(RelationGetTargetBlock(transientrel) == InvalidBlockNumber);
  }
  
@@ -447,6 +451,10 @@ transientrel_shutdown(DestReceiver *self)
  
         FreeBulkInsertState(myState->bistate);
  
+       /* If we skipped using WAL, must heap_sync before commit */
+       if (myState->hi_options & HEAP_INSERT_SKIP_WAL)
+               heap_sync(myState->transientrel);
+
         /* close transientrel, but keep lock until commit */
         heap_close(myState->transientrel, NoLock);
         myState->transientrel = NULL;
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c

index 54cbbd4a4beaa3bf7b5d427aeaa0324e13adf497..820041175221d80e40d1d9f2ee7ba5ae4a4e0dc2 100644 (file)
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -4013,14 +4013,19 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode)
                 newrel = NULL;
  
         /*
-        * Prepare a BulkInsertState and options for heap_insert.  The FSM is
-        * empty, so don't bother using it.
+        * Prepare a BulkInsertState and options for heap_insert. Because we're
+        * building a new heap, we can skip WAL-logging and fsync it to disk at
+        * the end instead (unless WAL-logging is required for archiving or
+        * streaming replication). The FSM is empty too, so don't bother using it.
          */
         if (newrel)
         {
                 mycid = GetCurrentCommandId(true);
                 bistate = GetBulkInsertState();
+
                 hi_options = HEAP_INSERT_SKIP_FSM;
+               if (!XLogIsNeeded())
+                       hi_options |= HEAP_INSERT_SKIP_WAL;
         }
         else
         {
@@ -4270,6 +4275,10 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode)
         {
                 FreeBulkInsertState(bistate);
  
+               /* If we skipped writing WAL, then we need to sync the heap. */
+               if (hi_options & HEAP_INSERT_SKIP_WAL)
+                       heap_sync(newrel);
+
                 heap_close(newrel, NoLock);
         }
  }
@@ -5949,19 +5958,14 @@ ATExecAddIndex(AlteredTableInfo *tab, Relation rel,
  
         /*
          * If TryReuseIndex() stashed a relfilenode for us, we used it for the new
-        * index instead of building from scratch.  Restore associated fields.
-        * This may store InvalidSubTransactionId in both fields, in which case
-        * relcache.c will assume it can rebuild the relcache entry.  Hence, do
-        * this after the CCI that made catalog rows visible to any rebuild.  The
-        * DROP of the old edition of this index will have scheduled the storage
-        * for deletion at commit, so cancel that pending deletion.
+        * index instead of building from scratch.  The DROP of the old edition of
+        * this index will have scheduled the storage for deletion at commit, so
+        * cancel that pending deletion.
          */
         if (OidIsValid(stmt->oldNode))
         {
                 Relation        irel = index_open(address.objectId, NoLock);
  
-               irel->rd_createSubid = stmt->oldCreateSubid;
-               irel->rd_firstRelfilenodeSubid = stmt->oldFirstRelfilenodeSubid;
                 RelationPreserveStorage(irel->rd_node, true);
                 index_close(irel, NoLock);
         }
@@ -9100,8 +9104,6 @@ TryReuseIndex(Oid oldId, IndexStmt *stmt)
                 Relation        irel = index_open(oldId, NoLock);
  
                 stmt->oldNode = irel->rd_node.relNode;
-               stmt->oldCreateSubid = irel->rd_createSubid;
-               stmt->oldFirstRelfilenodeSubid = irel->rd_firstRelfilenodeSubid;
                 index_close(irel, NoLock);
         }
  }
@@ -9947,8 +9949,6 @@ ATExecSetTableSpace(Oid tableOid, Oid newTableSpace, LOCKMODE lockmode)
  
         heap_close(pg_class, RowExclusiveLock);
  
-       RelationAssumeNewRelfilenode(rel);
-
         relation_close(rel, NoLock);
  
         /* Make sure the reltablespace change is visible */
@@ -10163,9 +10163,7 @@ copy_relation_data(SMgrRelation src, SMgrRelation dst,
  
         /*
          * We need to log the copied data in WAL iff WAL archiving/streaming is
-        * enabled AND it's a permanent relation.  This gives the same answer as
-        * "RelationNeedsWAL(rel) || copying_initfork", because we know the
-        * current operation created a new relfilenode.
+        * enabled AND it's a permanent relation.
          */
         use_wal = XLogIsNeeded() &&
                 (relpersistence == RELPERSISTENCE_PERMANENT || copying_initfork);
@@ -10207,15 +10205,21 @@ copy_relation_data(SMgrRelation src, SMgrRelation dst,
         }
  
         /*
-        * When we WAL-logged rel pages, we must nonetheless fsync them.  The
-        * reason is that since we're copying outside shared buffers, a CHECKPOINT
-        * occurring during the copy has no way to flush the previously written
-        * data to disk (indeed it won't know the new rel even exists).  A crash
-        * later on would replay WAL from the checkpoint, therefore it wouldn't
-        * replay our earlier WAL entries. If we do not fsync those pages here,
-        * they might still not be on disk when the crash occurs.
-        */
-       if (use_wal || copying_initfork)
+        * If the rel is WAL-logged, must fsync before commit.  We use heap_sync
+        * to ensure that the toast table gets fsync'd too.  (For a temp or
+        * unlogged rel we don't care since the data will be gone after a crash
+        * anyway.)
+        *
+        * It's obvious that we must do this when not WAL-logging the copy. It's
+        * less obvious that we have to do it even if we did WAL-log the copied
+        * pages. The reason is that since we're copying outside shared buffers, a
+        * CHECKPOINT occurring during the copy has no way to flush the previously
+        * written data to disk (indeed it won't know the new rel even exists).  A
+        * crash later on would replay WAL from the checkpoint, therefore it
+        * wouldn't replay our earlier WAL entries. If we do not fsync those pages
+        * here, they might still not be on disk when the crash occurs.
+        */
+       if (relpersistence == RELPERSISTENCE_PERMANENT || copying_initfork)
                 smgrimmedsync(dst, forkNum);
  }
  
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c

index bd122c292e8e47341fb73f489f5329f1dc1154ce..db492a7dd1db62e3c301c648c70f5c846acf9715 100644 (file)
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -3091,8 +3091,6 @@ _copyIndexStmt(const IndexStmt *from)
         COPY_STRING_FIELD(idxcomment);
         COPY_SCALAR_FIELD(indexOid);
         COPY_SCALAR_FIELD(oldNode);
-       COPY_SCALAR_FIELD(oldCreateSubid);
-       COPY_SCALAR_FIELD(oldFirstRelfilenodeSubid);
         COPY_SCALAR_FIELD(unique);
         COPY_SCALAR_FIELD(primary);
         COPY_SCALAR_FIELD(isconstraint);
diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c

index d1cf106257bf8258547f186d29eab3c0cfae0df4..e32b96eb1e24f9fd85fb0d566906f86e9953c539 100644 (file)
--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@@ -1236,8 +1236,6 @@ _equalIndexStmt(const IndexStmt *a, const IndexStmt *b)
         COMPARE_STRING_FIELD(idxcomment);
         COMPARE_SCALAR_FIELD(indexOid);
         COMPARE_SCALAR_FIELD(oldNode);
-       COMPARE_SCALAR_FIELD(oldCreateSubid);
-       COMPARE_SCALAR_FIELD(oldFirstRelfilenodeSubid);
         COMPARE_SCALAR_FIELD(unique);
         COMPARE_SCALAR_FIELD(primary);
         COMPARE_SCALAR_FIELD(isconstraint);
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c

index b062725eabb2f28bf273b5ec49804c47cdbd0874..8d9280197bb5cf2c939b648b4c31cbcc369f451d 100644 (file)
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -2143,8 +2143,6 @@ _outIndexStmt(StringInfo str, const IndexStmt *node)
         WRITE_STRING_FIELD(idxcomment);
         WRITE_OID_FIELD(indexOid);
         WRITE_OID_FIELD(oldNode);
-       WRITE_UINT_FIELD(oldCreateSubid);
-       WRITE_UINT_FIELD(oldFirstRelfilenodeSubid);
         WRITE_BOOL_FIELD(unique);
         WRITE_BOOL_FIELD(primary);
         WRITE_BOOL_FIELD(isconstraint);
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y

index 5a3a2a09b6431aeb02819bc15c8639ee8ec26a66..96910cf6edd81c52e1ae63594b05023c5c8c3836 100644 (file)
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -6611,8 +6611,6 @@ IndexStmt:        CREATE opt_unique INDEX opt_concurrently opt_index_name
                                         n->idxcomment = NULL;
                                         n->indexOid = InvalidOid;
                                         n->oldNode = InvalidOid;
-                                       n->oldCreateSubid = InvalidSubTransactionId;
-                                       n->oldFirstRelfilenodeSubid = InvalidSubTransactionId;
                                         n->primary = false;
                                         n->isconstraint = false;
                                         n->deferrable = false;
@@ -6639,8 +6637,6 @@ IndexStmt:        CREATE opt_unique INDEX opt_concurrently opt_index_name
                                         n->idxcomment = NULL;
                                         n->indexOid = InvalidOid;
                                         n->oldNode = InvalidOid;
-                                       n->oldCreateSubid = InvalidSubTransactionId;
-                                       n->oldFirstRelfilenodeSubid = InvalidSubTransactionId;
                                         n->primary = false;
                                         n->isconstraint = false;
                                         n->deferrable = false;
diff --git a/src/backend/parser/parse_utilcmd.c b/src/backend/parser/parse_utilcmd.c

index ac50c3a066dc6d0503449bc9de49bec473868019..5b84566d08cffd1f95a8d39657193434758c68fe 100644 (file)
--- a/src/backend/parser/parse_utilcmd.c
+++ b/src/backend/parser/parse_utilcmd.c
@@ -1086,8 +1086,6 @@ generateClonedIndexStmt(CreateStmtContext *cxt, Relation source_idx,
         index->idxcomment = NULL;
         index->indexOid = InvalidOid;
         index->oldNode = InvalidOid;
-       index->oldCreateSubid = InvalidSubTransactionId;
-       index->oldFirstRelfilenodeSubid = InvalidSubTransactionId;
         index->unique = idxrec->indisunique;
         index->primary = idxrec->indisprimary;
         index->transformed = true;      /* don't need transformIndexStmt */
@@ -1552,8 +1550,6 @@ transformIndexConstraint(Constraint *constraint, CreateStmtContext *cxt)
         index->idxcomment = NULL;
         index->indexOid = InvalidOid;
         index->oldNode = InvalidOid;
-       index->oldCreateSubid = InvalidSubTransactionId;
-       index->oldFirstRelfilenodeSubid = InvalidSubTransactionId;
         index->transformed = false;
         index->concurrent = false;
         index->if_not_exists = false;
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c

index adae695e5e842aa0a2b6b6c838f382eb44be99e4..ddd92e75a01bcbc0176fadbad33f4997aedc9863 100644 (file)
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -64,7 +64,7 @@
  #define BUF_WRITTEN                            0x01
  #define BUF_REUSABLE                   0x02
  
-#define RELS_BSEARCH_THRESHOLD         20
+#define DROP_RELS_BSEARCH_THRESHOLD            20
  
  typedef struct PrivateRefCountEntry
  {
@@ -75,19 +75,6 @@ typedef struct PrivateRefCountEntry
  /* 64 bytes, about the size of a cache line on common systems */
  #define REFCOUNT_ARRAY_ENTRIES 8
  
-/*
- * Type for array used to sort SMgrRelations
- *
- * FlushRelationsAllBuffers shares the same comparator function with
- * DropRelFileNodesAllBuffers. Pointer to this struct and RelFileNode must be
- * compatible.
- */
-typedef struct SMgrSortArray
-{
-       RelFileNode rnode;                      /* This must be the first member */
-       SMgrRelation srel;
-} SMgrSortArray;
-
  /* GUC variables */
  bool           zero_damaged_pages = false;
  int                    bgwriter_lru_maxpages = 100;
@@ -2651,7 +2638,7 @@ DropRelFileNodesAllBuffers(RelFileNodeBackend *rnodes, int nnodes)
          * an exactly determined value, as it depends on many factors (CPU and RAM
          * speeds, amount of shared buffers etc.).
          */
-       use_bsearch = n > RELS_BSEARCH_THRESHOLD;
+       use_bsearch = n > DROP_RELS_BSEARCH_THRESHOLD;
  
         /* sort the list of rnodes if necessary */
         if (use_bsearch)
@@ -2893,103 +2880,6 @@ FlushRelationBuffers(Relation rel)
         }
  }
  
-/* ---------------------------------------------------------------------
- *             FlushRelationsAllBuffers
- *
- *             This function flushes out of the buffer pool all the pages of all
- *             forks of the specified smgr relations.  It's equivalent to calling
- *             FlushRelationBuffers once per fork per relation.  The relations are
- *             assumed not to use local buffers.
- * --------------------------------------------------------------------
- */
-void
-FlushRelationsAllBuffers(SMgrRelation *smgrs, int nrels)
-{
-       int                     i;
-       SMgrSortArray *srels;
-       bool            use_bsearch;
-
-       if (nrels == 0)
-               return;
-
-       /* fill-in array for qsort */
-       srels = palloc(sizeof(SMgrSortArray) * nrels);
-
-       for (i = 0; i < nrels; i++)
-       {
-               Assert(!RelFileNodeBackendIsTemp(smgrs[i]->smgr_rnode));
-
-               srels[i].rnode = smgrs[i]->smgr_rnode.node;
-               srels[i].srel = smgrs[i];
-       }
-
-       /*
-        * Save the bsearch overhead for low number of relations to sync. See
-        * DropRelFileNodesAllBuffers for details.
-        */
-       use_bsearch = nrels > RELS_BSEARCH_THRESHOLD;
-
-       /* sort the list of SMgrRelations if necessary */
-       if (use_bsearch)
-               pg_qsort(srels, nrels, sizeof(SMgrSortArray), rnode_comparator);
-
-       /* Make sure we can handle the pin inside the loop */
-       ResourceOwnerEnlargeBuffers(CurrentResourceOwner);
-
-       for (i = 0; i < NBuffers; i++)
-       {
-               SMgrSortArray *srelent = NULL;
-               BufferDesc *bufHdr = GetBufferDescriptor(i);
-
-               /*
-                * As in DropRelFileNodeBuffers, an unlocked precheck should be safe
-                * and saves some cycles.
-                */
-
-               if (!use_bsearch)
-               {
-                       int                     j;
-
-                       for (j = 0; j < nrels; j++)
-                       {
-                               if (RelFileNodeEquals(bufHdr->tag.rnode, srels[j].rnode))
-                               {
-                                       srelent = &srels[j];
-                                       break;
-                               }
-                       }
-
-               }
-               else
-               {
-                       srelent = bsearch((const void *) &(bufHdr->tag.rnode),
-                                                         srels, nrels, sizeof(SMgrSortArray),
-                                                         rnode_comparator);
-               }
-
-               /* buffer doesn't belong to any of the given relfilenodes; skip it */
-               if (srelent == NULL)
-                       continue;
-
-               ReservePrivateRefCountEntry();
-
-               LockBufHdr(bufHdr);
-               if (RelFileNodeEquals(bufHdr->tag.rnode, srelent->rnode) &&
-                       (bufHdr->flags & BM_VALID) && (bufHdr->flags & BM_DIRTY))
-               {
-                       PinBuffer_Locked(bufHdr);
-                       LWLockAcquire(bufHdr->content_lock, LW_SHARED);
-                       FlushBuffer(bufHdr, srelent->srel);
-                       LWLockRelease(bufHdr->content_lock);
-                       UnpinBuffer(bufHdr, true);
-               }
-               else
-                       UnlockBufHdr(bufHdr);
-       }
-
-       pfree(srels);
-}
-
  /* ---------------------------------------------------------------------
   *             FlushDatabaseBuffers
   *
@@ -3187,15 +3077,13 @@ MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
                 if (XLogHintBitIsNeeded() && (bufHdr->flags & BM_PERMANENT))
                 {
                         /*
-                        * If we must not write WAL, due to a relfilenode-specific
-                        * condition or being in recovery, don't dirty the page.  We can
-                        * set the hint, just not dirty the page as a result so the hint
-                        * is lost when we evict the page or shutdown.
+                        * If we're in recovery we cannot dirty a page because of a hint.
+                        * We can set the hint, just not dirty the page as a result so the
+                        * hint is lost when we evict the page or shutdown.
                          *
                          * See src/backend/storage/page/README for longer discussion.
                          */
-                       if (RecoveryInProgress() ||
-                               RelFileNodeSkippingWAL(bufHdr->tag.rnode))
+                       if (RecoveryInProgress())
                                 return;
  
                         /*
diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c

index 801542a856c817107c090ffe58843277cc95d330..95093c47e3ca463bf89e256cc3ce3a5e4ff19988 100644 (file)
--- a/src/backend/storage/lmgr/lock.c
+++ b/src/backend/storage/lmgr/lock.c
@@ -547,18 +547,6 @@ DoLockModesConflict(LOCKMODE mode1, LOCKMODE mode2)
         return false;
  }
  
-#ifdef USE_ASSERT_CHECKING
-/*
- * GetLockMethodLocalHash -- return the hash of local locks, for modules that
- *             evaluate assertions based on all locks held.
- */
-HTAB *
-GetLockMethodLocalHash(void)
-{
-       return LockMethodLocalHash;
-}
-#endif
-
  /*
   * LockHasWaiters -- look up 'locktag' and check if releasing this
   *             lock would wake up other processes waiting for it.
diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c

index 1358f81e3fc317a2e6192526055075edc078744d..58a6e0f4ddcc9ea4ac011835ad3b1184d7558863 100644 (file)
--- a/src/backend/storage/smgr/md.c
+++ b/src/backend/storage/smgr/md.c
@@ -352,10 +352,11 @@ mdcreate(SMgrRelation reln, ForkNumber forkNum, bool isRedo)
   * During replay, we would delete the file and then recreate it, which is fine
   * if the contents of the file were repopulated by subsequent WAL entries.
   * But if we didn't WAL-log insertions, but instead relied on fsyncing the
- * file after populating it (as we do at wal_level=minimal), the contents of
- * the file would be lost forever.  By leaving the empty file until after the
- * next checkpoint, we prevent reassignment of the relfilenode number until
- * it's safe, because relfilenode assignment skips over any existing file.
+ * file after populating it (as for instance CLUSTER and CREATE INDEX do),
+ * the contents of the file would be lost forever.  By leaving the empty file
+ * until after the next checkpoint, we prevent reassignment of the relfilenode
+ * number until it's safe, because relfilenode assignment skips over any
+ * existing file.
   *
   * We do not need to go through this dance for temp relations, though, because
   * we never make WAL entries for temp rels, and so a temp rel poses no threat
@@ -960,19 +961,12 @@ mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
   *     mdimmedsync() -- Immediately sync a relation to stable storage.
   *
   * Note that only writes already issued are synced; this routine knows
- * nothing of dirty buffers that may exist inside the buffer manager.  We
- * sync active and inactive segments; smgrDoPendingSyncs() relies on this.
- * Consider a relation skipping WAL.  Suppose a checkpoint syncs blocks of
- * some segment, then mdtruncate() renders that segment inactive.  If we
- * crash before the next checkpoint syncs the newly-inactive segment, that
- * segment may survive recovery, reintroducing unwanted data into the table.
+ * nothing of dirty buffers that may exist inside the buffer manager.
   */
  void
  mdimmedsync(SMgrRelation reln, ForkNumber forknum)
  {
         MdfdVec    *v;
-       BlockNumber segno = 0;
-       bool            active = true;
  
         /*
          * NOTE: mdnblocks makes sure we have opened all active segments, so that
@@ -982,42 +976,14 @@ mdimmedsync(SMgrRelation reln, ForkNumber forknum)
  
         v = mdopen(reln, forknum, EXTENSION_FAIL);
  
-       /*
-        * Temporarily open inactive segments, then close them after sync.  There
-        * may be some inactive segments left opened after fsync() error, but that
-        * is harmless.  We don't bother to clean them up and take a risk of
-        * further trouble.  The next mdclose() will soon close them.
-        */
         while (v != NULL)
         {
-               File            vfd = v->mdfd_vfd;
-
-               if (active)
-                       v = v->mdfd_chain;
-               else
-               {
-                       Assert(v->mdfd_chain == NULL);
-                       pfree(v);
-                       v = NULL;
-               }
-
-               if (FileSync(vfd) < 0)
+               if (FileSync(v->mdfd_vfd) < 0)
                         ereport(data_sync_elevel(ERROR),
                                         (errcode_for_file_access(),
                                          errmsg("could not fsync file \"%s\": %m",
-                                                       FilePathName(vfd))));
-
-               /* Close inactive segments immediately */
-               if (!active)
-                       FileClose(vfd);
-
-               segno++;
-
-               if (v == NULL)
-               {
-                       v = _mdfd_openseg(reln, forknum, segno, 0);
-                       active = false;
-               }
+                                                       FilePathName(v->mdfd_vfd))));
+               v = v->mdfd_chain;
         }
  }
  
diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c

index 802732e25317737a3d188530101e60a684021c10..c3da4277418f1cf0ecbfbadf8ed0b90f9c01fe7e 100644 (file)
--- a/src/backend/storage/smgr/smgr.c
+++ b/src/backend/storage/smgr/smgr.c
@@ -406,41 +406,6 @@ smgrdounlink(SMgrRelation reln, bool isRedo)
         (*(smgrsw[which].smgr_unlink)) (rnode, InvalidForkNumber, isRedo);
  }
  
-/*
- *     smgrdosyncall() -- Immediately sync all forks of all given relations
- *
- *             All forks of all given relations are synced out to the store.
- *
- *             This is equivalent to FlushRelationBuffers() for each smgr relation,
- *             then calling smgrimmedsync() for all forks of each relation, but it's
- *             significantly quicker so should be preferred when possible.
- */
-void
-smgrdosyncall(SMgrRelation *rels, int nrels)
-{
-       int                     i = 0;
-       ForkNumber      forknum;
-
-       if (nrels == 0)
-               return;
-
-       FlushRelationsAllBuffers(rels, nrels);
-
-       /*
-        * Sync the physical file(s).
-        */
-       for (i = 0; i < nrels; i++)
-       {
-               int                     which = rels[i]->smgr_which;
-
-               for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
-               {
-                       if (smgrsw[which].smgr_exists(rels[i], forknum))
-                               smgrsw[which].smgr_immedsync(rels[i], forknum);
-               }
-       }
-}
-
  /*
   *     smgrdounlinkall() -- Immediately unlink all forks of all given relations
   *
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c

index 86f97bf7f54a0d8983f82ca0378715b9d4d23c5e..6ec89fadd6cb6ab0bdf28e3a10ad792b45792684 100644 (file)
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -243,9 +243,6 @@ static void RelationReloadIndexInfo(Relation relation);
  static void RelationReloadNailed(Relation relation);
  static void RelationFlushRelation(Relation relation);
  static void RememberToFreeTupleDescAtEOX(TupleDesc td);
-#ifdef USE_ASSERT_CHECKING
-static void AssertPendingSyncConsistency(Relation relation);
-#endif
  static void AtEOXact_cleanup(Relation relation, bool isCommit);
  static void AtEOSubXact_cleanup(Relation relation, bool isCommit,
                                         SubTransactionId mySubid, SubTransactionId parentSubid);
@@ -985,8 +982,6 @@ RelationBuildDesc(Oid targetRelId, bool insertIt)
         relation->rd_isnailed = false;
         relation->rd_createSubid = InvalidSubTransactionId;
         relation->rd_newRelfilenodeSubid = InvalidSubTransactionId;
-       relation->rd_firstRelfilenodeSubid = InvalidSubTransactionId;
-       relation->rd_droppedSubid = InvalidSubTransactionId;
         switch (relation->rd_rel->relpersistence)
         {
                 case RELPERSISTENCE_UNLOGGED:
@@ -1584,8 +1579,6 @@ formrdesc(const char *relationName, Oid relationReltype,
         relation->rd_isnailed = true;
         relation->rd_createSubid = InvalidSubTransactionId;
         relation->rd_newRelfilenodeSubid = InvalidSubTransactionId;
-       relation->rd_firstRelfilenodeSubid = InvalidSubTransactionId;
-       relation->rd_droppedSubid = InvalidSubTransactionId;
         relation->rd_backend = InvalidBackendId;
         relation->rd_islocaltemp = false;
  
@@ -1752,13 +1745,6 @@ RelationIdGetRelation(Oid relationId)
  
         if (RelationIsValid(rd))
         {
-               /* return NULL for dropped relations */
-               if (rd->rd_droppedSubid != InvalidSubTransactionId)
-               {
-                       Assert(!rd->rd_isvalid);
-                       return NULL;
-               }
-
                 RelationIncrementReferenceCount(rd);
                 /* revalidate cache entry if necessary */
                 if (!rd->rd_isvalid)
@@ -1851,7 +1837,7 @@ RelationClose(Relation relation)
  #ifdef RELCACHE_FORCE_RELEASE
         if (RelationHasReferenceCountZero(relation) &&
                 relation->rd_createSubid == InvalidSubTransactionId &&
-               relation->rd_firstRelfilenodeSubid == InvalidSubTransactionId)
+               relation->rd_newRelfilenodeSubid == InvalidSubTransactionId)
                 RelationClearRelation(relation, false);
  #endif
  }
@@ -1890,10 +1876,9 @@ RelationReloadIndexInfo(Relation relation)
         HeapTuple       pg_class_tuple;
         Form_pg_class relp;
  
-       /* Should be called only for invalidated, live indexes */
+       /* Should be called only for invalidated indexes */
         Assert(relation->rd_rel->relkind == RELKIND_INDEX &&
-                  !relation->rd_isvalid &&
-                  relation->rd_droppedSubid == InvalidSubTransactionId);
+                  !relation->rd_isvalid);
  
         /* Ensure it's closed at smgr level */
         RelationCloseSmgr(relation);
@@ -2174,13 +2159,6 @@ RelationClearRelation(Relation relation, bool rebuild)
                 return;
         }
  
-       /* Mark it invalid until we've finished rebuild */
-       relation->rd_isvalid = false;
-
-       /* See RelationForgetRelation(). */
-       if (relation->rd_droppedSubid != InvalidSubTransactionId)
-               return;
-
         /*
          * Even non-system indexes should not be blown away if they are open and
          * have valid index support information.  This avoids problems with active
@@ -2192,11 +2170,15 @@ RelationClearRelation(Relation relation, bool rebuild)
                 relation->rd_refcnt > 0 &&
                 relation->rd_indexcxt != NULL)
         {
+               relation->rd_isvalid = false;   /* needs to be revalidated */
                 if (IsTransactionState())
                         RelationReloadIndexInfo(relation);
                 return;
         }
  
+       /* Mark it invalid until we've finished rebuild */
+       relation->rd_isvalid = false;
+
         /*
          * If we're really done with the relcache entry, blow it away. But if
          * someone is still using it, reconstruct the whole deal without moving
@@ -2254,12 +2236,12 @@ RelationClearRelation(Relation relation, bool rebuild)
                  * problem.
                  *
                  * When rebuilding an open relcache entry, we must preserve ref count,
-                * rd_*Subid, and rd_toastoid state.  Also attempt to preserve the
-                * pg_class entry (rd_rel), tupledesc, and rewrite-rule substructures
-                * in place, because various places assume that these structures won't
-                * move while they are working with an open relcache entry.  (Note:
-                * the refcount mechanism for tupledescs might someday allow us to
-                * remove this hack for the tupledesc.)
+                * rd_createSubid/rd_newRelfilenodeSubid, and rd_toastoid state.  Also
+                * attempt to preserve the pg_class entry (rd_rel), tupledesc, and
+                * rewrite-rule substructures in place, because various places assume
+                * that these structures won't move while they are working with an
+                * open relcache entry.  (Note: the refcount mechanism for tupledescs
+                * might someday allow us to remove this hack for the tupledesc.)
                  *
                  * Note that this process does not touch CurrentResourceOwner; which
                  * is good because whatever ref counts the entry may have do not
@@ -2336,8 +2318,6 @@ RelationClearRelation(Relation relation, bool rebuild)
                 /* creation sub-XIDs must be preserved */
                 SWAPFIELD(SubTransactionId, rd_createSubid);
                 SWAPFIELD(SubTransactionId, rd_newRelfilenodeSubid);
-               SWAPFIELD(SubTransactionId, rd_firstRelfilenodeSubid);
-               SWAPFIELD(SubTransactionId, rd_droppedSubid);
                 /* un-swap rd_rel pointers, swap contents instead */
                 SWAPFIELD(Form_pg_class, rd_rel);
                 /* ... but actually, we don't have to update newrel->rd_rel */
@@ -2374,12 +2354,12 @@ static void
  RelationFlushRelation(Relation relation)
  {
         if (relation->rd_createSubid != InvalidSubTransactionId ||
-               relation->rd_firstRelfilenodeSubid != InvalidSubTransactionId)
+               relation->rd_newRelfilenodeSubid != InvalidSubTransactionId)
         {
                 /*
                  * New relcache entries are always rebuilt, not flushed; else we'd
-                * forget the "new" status of the relation.  Ditto for the
-                * new-relfilenode status.
+                * forget the "new" status of the relation, which is a useful
+                * optimization to have.  Ditto for the new-relfilenode status.
                  *
                  * The rel could have zero refcnt here, so temporarily increment the
                  * refcnt to ensure it's safe to rebuild it.  We can assume that the
@@ -2401,7 +2381,10 @@ RelationFlushRelation(Relation relation)
  }
  
  /*
- * RelationForgetRelation - caller reports that it dropped the relation
+ * RelationForgetRelation - unconditionally remove a relcache entry
+ *
+ *                External interface for destroying a relcache entry when we
+ *                drop the relation.
   */
  void
  RelationForgetRelation(Oid rid)
@@ -2416,19 +2399,7 @@ RelationForgetRelation(Oid rid)
         if (!RelationHasReferenceCountZero(relation))
                 elog(ERROR, "relation %u is still open", rid);
  
-       Assert(relation->rd_droppedSubid == InvalidSubTransactionId);
-       if (relation->rd_createSubid != InvalidSubTransactionId ||
-               relation->rd_firstRelfilenodeSubid != InvalidSubTransactionId)
-       {
-               /*
-                * In the event of subtransaction rollback, we must not forget
-                * rd_*Subid.  Mark the entry "dropped" so RelationClearRelation()
-                * invalidates it in lieu of destroying it.  (If we're in a top
-                * transaction, we could opt to destroy the entry.)
-                */
-               relation->rd_droppedSubid = GetCurrentSubTransactionId();
-       }
-
+       /* Unconditionally destroy the relcache entry */
         RelationClearRelation(relation, false);
  }
  
@@ -2468,10 +2439,11 @@ RelationCacheInvalidateEntry(Oid relationId)
   *      relation cache and re-read relation mapping data.
   *
   *      This is currently used only to recover from SI message buffer overflow,
- *      so we do not touch relations having new-in-transaction relfilenodes; they
- *      cannot be targets of cross-backend SI updates (and our own updates now go
- *      through a separate linked list that isn't limited by the SI message
- *      buffer size).
+ *      so we do not touch new-in-transaction relations; they cannot be targets
+ *      of cross-backend SI updates (and our own updates now go through a
+ *      separate linked list that isn't limited by the SI message buffer size).
+ *      Likewise, we need not discard new-relfilenode-in-transaction hints,
+ *      since any invalidation of those would be a local event.
   *
   *      We do this in two phases: the first pass deletes deletable items, and
   *      the second one rebuilds the rebuildable items.  This is essential for
@@ -2522,7 +2494,7 @@ RelationCacheInvalidate(void)
                  * pending invalidations.
                  */
                 if (relation->rd_createSubid != InvalidSubTransactionId ||
-                       relation->rd_firstRelfilenodeSubid != InvalidSubTransactionId)
+                       relation->rd_newRelfilenodeSubid != InvalidSubTransactionId)
                         continue;
  
                 relcacheInvalsReceived++;
@@ -2634,96 +2606,6 @@ RememberToFreeTupleDescAtEOX(TupleDesc td)
         EOXactTupleDescArray[NextEOXactTupleDescNum++] = td;
  }
  
-#ifdef USE_ASSERT_CHECKING
-/*
- * Relation kinds that have physical storage. These relations normally have
- * relfilenode set to non-zero, but it can also be zero if the relation is
- * mapped.
- */
-#define RELKIND_HAS_STORAGE(relkind) \
-       ((relkind) == RELKIND_RELATION || \
-        (relkind) == RELKIND_INDEX || \
-        (relkind) == RELKIND_SEQUENCE || \
-        (relkind) == RELKIND_TOASTVALUE || \
-        (relkind) == RELKIND_MATVIEW)
-
-static void
-AssertPendingSyncConsistency(Relation relation)
-{
-       bool            relcache_verdict =
-       relation->rd_rel->relpersistence == RELPERSISTENCE_PERMANENT &&
-       ((relation->rd_createSubid != InvalidSubTransactionId &&
-         RELKIND_HAS_STORAGE(relation->rd_rel->relkind)) ||
-        relation->rd_firstRelfilenodeSubid != InvalidSubTransactionId);
-
-       Assert(relcache_verdict == RelFileNodeSkippingWAL(relation->rd_node));
-
-       if (relation->rd_droppedSubid != InvalidSubTransactionId)
-               Assert(!relation->rd_isvalid &&
-                          (relation->rd_createSubid != InvalidSubTransactionId ||
-                               relation->rd_firstRelfilenodeSubid != InvalidSubTransactionId));
-}
-
-/*
- * AssertPendingSyncs_RelationCache
- *
- *     Assert that relcache.c and storage.c agree on whether to skip WAL.
- */
-void
-AssertPendingSyncs_RelationCache(void)
-{
-       HASH_SEQ_STATUS status;
-       LOCALLOCK  *locallock;
-       Relation   *rels;
-       int                     maxrels;
-       int                     nrels;
-       RelIdCacheEnt *idhentry;
-       int                     i;
-
-       /*
-        * Open every relation that this transaction has locked.  If, for some
-        * relation, storage.c is skipping WAL and relcache.c is not skipping WAL,
-        * a CommandCounterIncrement() typically yields a local invalidation
-        * message that destroys the relcache entry.  By recreating such entries
-        * here, we detect the problem.
-        */
-       PushActiveSnapshot(GetTransactionSnapshot());
-       maxrels = 1;
-       rels = palloc(maxrels * sizeof(*rels));
-       nrels = 0;
-       hash_seq_init(&status, GetLockMethodLocalHash());
-       while ((locallock = (LOCALLOCK *) hash_seq_search(&status)) != NULL)
-       {
-               Oid                     relid;
-               Relation        r;
-
-               if (locallock->nLocks <= 0)
-                       continue;
-               if ((LockTagType) locallock->tag.lock.locktag_type !=
-                       LOCKTAG_RELATION)
-                       continue;
-               relid = ObjectIdGetDatum(locallock->tag.lock.locktag_field2);
-               r = RelationIdGetRelation(relid);
-               if (!RelationIsValid(r))
-                       continue;
-               if (nrels >= maxrels)
-               {
-                       maxrels *= 2;
-                       rels = repalloc(rels, maxrels * sizeof(*rels));
-               }
-               rels[nrels++] = r;
-       }
-
-       hash_seq_init(&status, RelationIdCache);
-       while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
-               AssertPendingSyncConsistency(idhentry->reldesc);
-
-       for (i = 0; i < nrels; i++)
-               RelationClose(rels[i]);
-       PopActiveSnapshot();
-}
-#endif
-
  /*
   * AtEOXact_RelationCache
   *
@@ -2806,8 +2688,6 @@ AtEOXact_RelationCache(bool isCommit)
  static void
  AtEOXact_cleanup(Relation relation, bool isCommit)
  {
-       bool            clear_relcache = false;
-
         /*
          * The relcache entry's ref count should be back to its normal
          * not-in-a-transaction state: 0 unless it's nailed in cache.
@@ -2833,31 +2713,17 @@ AtEOXact_cleanup(Relation relation, bool isCommit)
  #endif
  
         /*
-        * Is the relation live after this transaction ends?
+        * Is it a relation created in the current transaction?
          *
-        * During commit, clear the relcache entry if it is preserved after
-        * relation drop, in order not to orphan the entry.  During rollback,
-        * clear the relcache entry if the relation is created in the current
-        * transaction since it isn't interesting any longer once we are out of
-        * the transaction.
-        */
-       clear_relcache =
-               (isCommit ?
-                relation->rd_droppedSubid != InvalidSubTransactionId :
-                relation->rd_createSubid != InvalidSubTransactionId);
-
-       /*
-        * Since we are now out of the transaction, reset the subids to zero.
-        * That also lets RelationClearRelation() drop the relcache entry.
+        * During commit, reset the flag to zero, since we are now out of the
+        * creating transaction.  During abort, simply delete the relcache entry
+        * --- it isn't interesting any longer.
          */
-       relation->rd_createSubid = InvalidSubTransactionId;
-       relation->rd_newRelfilenodeSubid = InvalidSubTransactionId;
-       relation->rd_firstRelfilenodeSubid = InvalidSubTransactionId;
-       relation->rd_droppedSubid = InvalidSubTransactionId;
-
-       if (clear_relcache)
+       if (relation->rd_createSubid != InvalidSubTransactionId)
         {
-               if (RelationHasReferenceCountZero(relation))
+               if (isCommit)
+                       relation->rd_createSubid = InvalidSubTransactionId;
+               else if (RelationHasReferenceCountZero(relation))
                 {
                         RelationClearRelation(relation, false);
                         return;
@@ -2872,11 +2738,17 @@ AtEOXact_cleanup(Relation relation, bool isCommit)
                          * eventually.  This must be just a WARNING to avoid
                          * error-during-error-recovery loops.
                          */
+                       relation->rd_createSubid = InvalidSubTransactionId;
                         elog(WARNING, "cannot remove relcache entry for \"%s\" because it has nonzero refcount",
                                  RelationGetRelationName(relation));
                 }
         }
  
+       /*
+        * Likewise, reset the hint about the relfilenode being new.
+        */
+       relation->rd_newRelfilenodeSubid = InvalidSubTransactionId;
+
         /*
          * Flush any temporary index list.
          */
@@ -2951,28 +2823,15 @@ AtEOSubXact_cleanup(Relation relation, bool isCommit,
         /*
          * Is it a relation created in the current subtransaction?
          *
-        * During subcommit, mark it as belonging to the parent, instead, as long
-        * as it has not been dropped. Otherwise simply delete the relcache entry.
-        * --- it isn't interesting any longer.
+        * During subcommit, mark it as belonging to the parent, instead. During
+        * subabort, simply delete the relcache entry.
          */
         if (relation->rd_createSubid == mySubid)
         {
-               /*
-                * Valid rd_droppedSubid means the corresponding relation is dropped
-                * but the relcache entry is preserved for at-commit pending sync. We
-                * need to drop it explicitly here not to make the entry orphan.
-                */
-               Assert(relation->rd_droppedSubid == mySubid ||
-                          relation->rd_droppedSubid == InvalidSubTransactionId);
-               if (isCommit && relation->rd_droppedSubid == InvalidSubTransactionId)
+               if (isCommit)
                         relation->rd_createSubid = parentSubid;
                 else if (RelationHasReferenceCountZero(relation))
                 {
-                       /* allow the entry to be removed */
-                       relation->rd_createSubid = InvalidSubTransactionId;
-                       relation->rd_newRelfilenodeSubid = InvalidSubTransactionId;
-                       relation->rd_firstRelfilenodeSubid = InvalidSubTransactionId;
-                       relation->rd_droppedSubid = InvalidSubTransactionId;
                         RelationClearRelation(relation, false);
                         return;
                 }
@@ -2992,8 +2851,7 @@ AtEOSubXact_cleanup(Relation relation, bool isCommit,
         }
  
         /*
-        * Likewise, update or drop any new-relfilenode-in-subtransaction record
-        * or drop record.
+        * Likewise, update or drop any new-relfilenode-in-subtransaction hint.
          */
         if (relation->rd_newRelfilenodeSubid == mySubid)
         {
@@ -3003,22 +2861,6 @@ AtEOSubXact_cleanup(Relation relation, bool isCommit,
                         relation->rd_newRelfilenodeSubid = InvalidSubTransactionId;
         }
  
-       if (relation->rd_firstRelfilenodeSubid == mySubid)
-       {
-               if (isCommit)
-                       relation->rd_firstRelfilenodeSubid = parentSubid;
-               else
-                       relation->rd_firstRelfilenodeSubid = InvalidSubTransactionId;
-       }
-
-       if (relation->rd_droppedSubid == mySubid)
-       {
-               if (isCommit)
-                       relation->rd_droppedSubid = parentSubid;
-               else
-                       relation->rd_droppedSubid = InvalidSubTransactionId;
-       }
-
         /*
          * Flush any temporary index list.
          */
@@ -3118,7 +2960,6 @@ RelationBuildLocalRelation(const char *relname,
         /* it's being created in this transaction */
         rel->rd_createSubid = GetCurrentSubTransactionId();
         rel->rd_newRelfilenodeSubid = InvalidSubTransactionId;
-       rel->rd_firstRelfilenodeSubid = InvalidSubTransactionId;
  
         /*
          * create a new tuple descriptor from the one passed in.  We do this
@@ -3389,29 +3230,14 @@ RelationSetNewRelfilenode(Relation relation, char persistence,
          */
         CommandCounterIncrement();
  
-       RelationAssumeNewRelfilenode(relation);
-}
-
-/*
- * RelationAssumeNewRelfilenode
- *
- * Code that modifies pg_class.reltablespace or pg_class.relfilenode must call
- * this.  The call shall precede any code that might insert WAL records whose
- * replay would modify bytes in the new RelFileNode, and the call shall follow
- * any WAL modifying bytes in the prior RelFileNode.  See struct RelationData.
- * Ideally, call this as near as possible to the CommandCounterIncrement()
- * that makes the pg_class change visible (before it or after it); that
- * minimizes the chance of future development adding a forbidden WAL insertion
- * between RelationAssumeNewRelfilenode() and CommandCounterIncrement().
- */
-void
-RelationAssumeNewRelfilenode(Relation relation)
-{
+       /*
+        * Mark the rel as having been given a new relfilenode in the current
+        * (sub) transaction.  This is a hint that can be used to optimize later
+        * operations on the rel in the same transaction.
+        */
         relation->rd_newRelfilenodeSubid = GetCurrentSubTransactionId();
-       if (relation->rd_firstRelfilenodeSubid == InvalidSubTransactionId)
-               relation->rd_firstRelfilenodeSubid = relation->rd_newRelfilenodeSubid;
  
-       /* Flag relation as needing eoxact cleanup (to clear these fields) */
+       /* Flag relation as needing eoxact cleanup (to remove the hint) */
         EOXactListAdd(relation);
  }
  
@@ -5267,8 +5093,6 @@ load_relcache_init_file(bool shared)
                 rel->rd_idattr = NULL;
                 rel->rd_createSubid = InvalidSubTransactionId;
                 rel->rd_newRelfilenodeSubid = InvalidSubTransactionId;
-               rel->rd_firstRelfilenodeSubid = InvalidSubTransactionId;
-               rel->rd_droppedSubid = InvalidSubTransactionId;
                 rel->rd_amcache = NULL;
                 MemSet(&rel->pgstat_info, 0, sizeof(rel->pgstat_info));
  
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c

index 9b211e8ada05273c58ce61164810b8b6ec0510ed..569439fec758bd35d17dc94a59786b25ece0e992 100644 (file)
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -32,7 +32,6 @@
  #include "access/twophase.h"
  #include "access/xact.h"
  #include "catalog/namespace.h"
-#include "catalog/storage.h"
  #include "commands/async.h"
  #include "commands/prepare.h"
  #include "commands/vacuum.h"
@@ -2242,17 +2241,6 @@ static struct config_int ConfigureNamesInt[] =
                 NULL, NULL, NULL
         },
  
-       {
-               {"wal_skip_threshold", PGC_USERSET, WAL_SETTINGS,
-                       gettext_noop("Size of new file to fsync instead of writing WAL."),
-                       NULL,
-                       GUC_UNIT_KB
-               },
-               &wal_skip_threshold,
-               2048, 0, MAX_KILOBYTES,
-               NULL, NULL, NULL
-       },
-
         {
                 /* see max_connections */
                 {"max_wal_senders", PGC_POSTMASTER, REPLICATION_SENDING,
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample

index 0e83fd7e702f0ef498983f9bdaebe4ea17bc03b3..c7b1721b3352798f54900d9011e2c3dffc7d76b6 100644 (file)
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -188,7 +188,6 @@
  #wal_buffers = -1                      # min 32kB, -1 sets based on shared_buffers
                                         # (change requires restart)
  #wal_writer_delay = 200ms              # 1-10000 milliseconds
-#wal_skip_threshold = 2MB
  
  #commit_delay = 0                      # range 0-100000, in microseconds
  #commit_siblings = 5                   # range 1-1000
diff --git a/src/include/access/gist_private.h b/src/include/access/gist_private.h

index a5f2557e069e516255bc5a7304be9b9b51b7d9ad..35cbfcb282f056036fb584dfbefa913868b697e5 100644 (file)
--- a/src/include/access/gist_private.h
+++ b/src/include/access/gist_private.h
@@ -180,7 +180,6 @@ typedef GISTScanOpaqueData *GISTScanOpaque;
   /* #define XLOG_GIST_INSERT_COMPLETE   0x40 */        /* not used anymore */
  #define XLOG_GIST_CREATE_INDEX         0x50
   /* #define XLOG_GIST_PAGE_DELETE               0x60 */        /* not used anymore */
-#define XLOG_GIST_ASSIGN_LSN           0x70    /* nop, assign new LSN */
  
  /*
   * Backup Blk 0: updated page.
@@ -465,8 +464,6 @@ extern XLogRecPtr gistXLogSplit(RelFileNode node,
                           BlockNumber origrlink, GistNSN oldnsn,
                           Buffer leftchild, bool markfollowright);
  
-extern XLogRecPtr gistXLogAssignLSN(void);
-
  /* gistget.c */
  extern Datum gistgettuple(PG_FUNCTION_ARGS);
  extern Datum gistgetbitmap(PG_FUNCTION_ARGS);
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h

index 8a14693c28ad8c260cd6b110221e18a6ddd25cee..6290ac056d4fc08106dec4764cc2654958a9cfc7 100644 (file)
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -25,6 +25,7 @@
  
  
  /* "options" flag bits for heap_insert */
+#define HEAP_INSERT_SKIP_WAL   0x0001
  #define HEAP_INSERT_SKIP_FSM   0x0002
  #define HEAP_INSERT_FROZEN             0x0004
  #define HEAP_INSERT_SPECULATIVE 0x0008
diff --git a/src/include/access/rewriteheap.h b/src/include/access/rewriteheap.h

index 748866802e7ff3893145537aa86c6d98d5bea535..91f4051d75d4f81f7d6aec32cb571ee3f0bc39ef 100644 (file)
--- a/src/include/access/rewriteheap.h
+++ b/src/include/access/rewriteheap.h
@@ -23,7 +23,7 @@ typedef struct RewriteStateData *RewriteState;
  
  extern RewriteState begin_heap_rewrite(Relation OldHeap, Relation NewHeap,
                                    TransactionId OldestXmin, TransactionId FreezeXid,
-                                  MultiXactId MultiXactCutoff);
+                                  MultiXactId MultiXactCutoff, bool use_wal);
  extern void end_heap_rewrite(RewriteState state);
  extern void rewrite_heap_tuple(RewriteState state, HeapTuple oldTuple,
                                    HeapTuple newTuple);
diff --git a/src/include/catalog/storage.h b/src/include/catalog/storage.h

index 5a7f8e7bea0461cfba5359c2b1bc7269aa157943..e40f7f174bc4a02c9df1e5f1ae7a3a2e49963872 100644 (file)
--- a/src/include/catalog/storage.h
+++ b/src/include/catalog/storage.h
@@ -18,22 +18,16 @@
  #include "storage/relfilenode.h"
  #include "utils/relcache.h"
  
-/* GUC variables */
-extern int     wal_skip_threshold;
-
  extern void RelationCreateStorage(RelFileNode rnode, char relpersistence);
  extern void RelationDropStorage(Relation rel);
  extern void RelationPreserveStorage(RelFileNode rnode, bool atCommit);
-extern void RelationPreTruncate(Relation rel);
  extern void RelationTruncate(Relation rel, BlockNumber nblocks);
-extern bool RelFileNodeSkippingWAL(RelFileNode rnode);
  
  /*
   * These functions used to be in storage/smgr/smgr.c, which explains the
   * naming
   */
  extern void smgrDoPendingDeletes(bool isCommit);
-extern void smgrDoPendingSyncs(bool isCommit);
  extern int     smgrGetPendingDeletes(bool forCommit, RelFileNode **ptr);
  extern void AtSubCommit_smgr(void);
  extern void AtSubAbort_smgr(void);
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h

index 954c7a547527b298b9340e26eaf5def4fff0c26d..03865e71651423235d8a12394060b8b0b4175d55 100644 (file)
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -2433,9 +2433,6 @@ typedef struct IndexStmt
         bool            transformed;    /* true when transformIndexStmt is finished */
         bool            concurrent;             /* should this be a concurrent index build? */
         bool            if_not_exists;  /* just do nothing if index already exists? */
-       SubTransactionId oldCreateSubid;        /* rd_createSubid of oldNode */
-       SubTransactionId oldFirstRelfilenodeSubid;      /* rd_firstRelfilenodeSubid of
-                                                                                                * oldNode */
  } IndexStmt;
  
  /* ----------------------
diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h

index c9c425d4639ba532d088d5124e56c134b2d61e08..175339b4c50188a2ff58e537934b3ed1a36e0e1f 100644 (file)
--- a/src/include/storage/bufmgr.h
+++ b/src/include/storage/bufmgr.h
@@ -45,9 +45,6 @@ typedef enum
                                                                  * replay; otherwise same as RBM_NORMAL */
  } ReadBufferMode;
  
-/* forward declared, to avoid including smgr.h here */
-struct SMgrRelationData;
-
  /* in globals.c ... this duplicates miscadmin.h */
  extern PGDLLIMPORT int NBuffers;
  
@@ -174,7 +171,6 @@ extern BlockNumber RelationGetNumberOfBlocksInFork(Relation relation,
                                                                 ForkNumber forkNum);
  extern void FlushOneBuffer(Buffer buffer);
  extern void FlushRelationBuffers(Relation rel);
-extern void FlushRelationsAllBuffers(struct SMgrRelationData **smgrs, int nrels);
  extern void FlushDatabaseBuffers(Oid dbid);
  extern void DropRelFileNodeBuffers(RelFileNodeBackend rnode,
                                            ForkNumber forkNum, BlockNumber firstDelBlock);
diff --git a/src/include/storage/lock.h b/src/include/storage/lock.h

index ac39b1140bc4c0e6710982ec2bfe3b88c2467575..c19a934b09a5046554a513a573d2160d4733d22f 100644 (file)
--- a/src/include/storage/lock.h
+++ b/src/include/storage/lock.h
@@ -525,9 +525,6 @@ extern void LockReleaseAll(LOCKMETHODID lockmethodid, bool allLocks);
  extern void LockReleaseSession(LOCKMETHODID lockmethodid);
  extern void LockReleaseCurrentOwner(LOCALLOCK **locallocks, int nlocks);
  extern void LockReassignCurrentOwner(LOCALLOCK **locallocks, int nlocks);
-#ifdef USE_ASSERT_CHECKING
-extern HTAB *GetLockMethodLocalHash(void);
-#endif
  extern bool LockHasWaiters(const LOCKTAG *locktag,
                            LOCKMODE lockmode, bool sessionLock);
  extern VirtualTransactionId *GetLockConflicts(const LOCKTAG *locktag,
diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h

index 1010f5ea107f6d9c4a67e93351b656762ed364ae..35c78ef6f25900d1c3f8206845059d703970cd2a 100644 (file)
--- a/src/include/storage/smgr.h
+++ b/src/include/storage/smgr.h
@@ -87,7 +87,6 @@ extern void smgrcloseall(void);
  extern void smgrclosenode(RelFileNodeBackend rnode);
  extern void smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo);
  extern void smgrdounlink(SMgrRelation reln, bool isRedo);
-extern void smgrdosyncall(SMgrRelation *rels, int nrels);
  extern void smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo);
  extern void smgrdounlinkfork(SMgrRelation reln, ForkNumber forknum, bool isRedo);
  extern void smgrextend(SMgrRelation reln, ForkNumber forknum,
diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h

index 99e6351c037b99a35d15ef77f62185e6ce002bce..ca6c0a5adfa60ac54e76819e8f2b9e06f373bed4 100644 (file)
--- a/src/include/utils/rel.h
+++ b/src/include/utils/rel.h
@@ -15,7 +15,6 @@
  #define REL_H
  
  #include "access/tupdesc.h"
-#include "access/xlog.h"
  #include "catalog/pg_am.h"
  #include "catalog/pg_class.h"
  #include "catalog/pg_index.h"
@@ -83,43 +82,25 @@ typedef struct RelationData
  
         /*----------
          * rd_createSubid is the ID of the highest subtransaction the rel has
-        * survived into or zero if the rel or its rd_node was created before the
-        * current top transaction.  (IndexStmt.oldNode leads to the case of a new
-        * rel with an old rd_node.)  rd_firstRelfilenodeSubid is the ID of the
-        * highest subtransaction an rd_node change has survived into or zero if
-        * rd_node matches the value it had at the start of the current top
-        * transaction.  (Rolling back the subtransaction that
-        * rd_firstRelfilenodeSubid denotes would restore rd_node to the value it
-        * had at the start of the current top transaction.  Rolling back any
-        * lower subtransaction would not.)  Their accuracy is critical to
-        * RelationNeedsWAL().
-        *
-        * rd_newRelfilenodeSubid is the ID of the highest subtransaction the
-        * most-recent relfilenode change has survived into or zero if not changed
-        * in the current transaction (or we have forgotten changing it).  This
-        * field is accurate when non-zero, but it can be zero when a relation has
-        * multiple new relfilenodes within a single transaction, with one of them
-        * occurring in a subsequently aborted subtransaction, e.g.
+        * survived into; or zero if the rel was not created in the current top
+        * transaction.  This can be now be relied on, whereas previously it could
+        * be "forgotten" in earlier releases. Likewise, rd_newRelfilenodeSubid is
+        * the ID of the highest subtransaction the relfilenode change has
+        * survived into, or zero if not changed in the current transaction (or we
+        * have forgotten changing it). rd_newRelfilenodeSubid can be forgotten
+        * when a relation has multiple new relfilenodes within a single
+        * transaction, with one of them occurring in a subsequently aborted
+        * subtransaction, e.g.
          *              BEGIN;
          *              TRUNCATE t;
          *              SAVEPOINT save;
          *              TRUNCATE t;
          *              ROLLBACK TO save;
          *              -- rd_newRelfilenodeSubid is now forgotten
-        *
-        * If every rd_*Subid field is zero, they are read-only outside
-        * relcache.c.  Files that trigger rd_node changes by updating
-        * pg_class.reltablespace and/or pg_class.relfilenode call
-        * RelationAssumeNewRelfilenode() to update rd_*Subid.
-        *
-        * rd_droppedSubid is the ID of the highest subtransaction that a drop of
-        * the rel has survived into.  In entries visible outside relcache.c, this
-        * is always zero.
          */
         SubTransactionId rd_createSubid;        /* rel was created in current xact */
-       SubTransactionId rd_newRelfilenodeSubid;        /* highest subxact changing
-                                                                                                * rd_node to current value */
-       /* see end for rd_firstRelfilenodeSubid and rd_droppedSubid */
+       SubTransactionId rd_newRelfilenodeSubid;        /* new relfilenode assigned in
+                                                                                                * current xact */
  
         Form_pg_class rd_rel;           /* RELATION tuple */
         TupleDesc       rd_att;                 /* tuple descriptor */
@@ -208,10 +189,6 @@ typedef struct RelationData
  
         /* use "struct" here to avoid needing to include pgstat.h: */
         struct PgStat_TableStatus *pgstat_info;         /* statistics collection area */
-
-       SubTransactionId rd_firstRelfilenodeSubid;      /* highest subxact changing
-                                                                                                * rd_node to any value */
-       SubTransactionId rd_droppedSubid;       /* dropped with another Subid set */
  } RelationData;
  
  /*
@@ -460,16 +437,9 @@ typedef struct ViewOptions
  /*
   * RelationNeedsWAL
   *             True if relation needs WAL.
- *
- * Returns false if wal_level = minimal and this relation is created or
- * truncated in the current transaction.  See "Skipping WAL for New
- * RelFileNode" in src/backend/access/transam/README.
- */
-#define RelationNeedsWAL(relation)                                                                             \
-       ((relation)->rd_rel->relpersistence == RELPERSISTENCE_PERMANENT &&      \
-        (XLogIsNeeded() ||                                                                                                     \
-         (relation->rd_createSubid == InvalidSubTransactionId &&                       \
-          relation->rd_firstRelfilenodeSubid == InvalidSubTransactionId)))
+ */
+#define RelationNeedsWAL(relation) \
+       ((relation)->rd_rel->relpersistence == RELPERSISTENCE_PERMANENT)
  
  /*
   * RelationUsesLocalBuffers
diff --git a/src/include/utils/relcache.h b/src/include/utils/relcache.h

index e0fb48144a670c1620d506ed9efe016eaab651fc..69f0364ec8ddbb75cfc8a505225909d48c8ea1fa 100644 (file)
--- a/src/include/utils/relcache.h
+++ b/src/include/utils/relcache.h
@@ -94,11 +94,10 @@ extern Relation RelationBuildLocalRelation(const char *relname,
                                                    char relkind);
  
  /*
- * Routines to manage assignment of new relfilenode to a relation
+ * Routine to manage assignment of new relfilenode to a relation
   */
  extern void RelationSetNewRelfilenode(Relation relation, char persistence,
                                                   TransactionId freezeXid, MultiXactId minmulti);
-extern void RelationAssumeNewRelfilenode(Relation relation);
  
  /*
   * Routines for flushing/rebuilding relcache entries in various scenarios
@@ -111,11 +110,6 @@ extern void RelationCacheInvalidate(void);
  
  extern void RelationCloseSmgrByOid(Oid relationId);
  
-#ifdef USE_ASSERT_CHECKING
-extern void AssertPendingSyncs_RelationCache(void);
-#else
-#define AssertPendingSyncs_RelationCache() do {} while (0)
-#endif
  extern void AtEOXact_RelationCache(bool isCommit);
  extern void AtEOSubXact_RelationCache(bool isCommit, SubTransactionId mySubid,
                                                   SubTransactionId parentSubid);
diff --git a/src/test/recovery/t/018_wal_optimize.pl b/src/test/recovery/t/018_wal_optimize.pl

deleted file mode 100644 (file)

index 45e1521..0000000
--- a/src/test/recovery/t/018_wal_optimize.pl
+++ /dev/null
@@ -1,372 +0,0 @@
-# Test WAL replay when some operation has skipped WAL.
-#
-# These tests exercise code that once violated the mandate described in
-# src/backend/access/transam/README section "Skipping WAL for New
-# RelFileNode".  The tests work by committing some transactions, initiating an
-# immediate shutdown, and confirming that the expected data survives recovery.
-# For many years, individual commands made the decision to skip WAL, hence the
-# frequent appearance of COPY in these tests.
-use strict;
-use warnings;
-
-use PostgresNode;
-use TestLib;
-use Test::More tests => 34;
-
-sub check_orphan_relfilenodes
-{
-       my ($node, $test_name) = @_;
-
-       my $db_oid = $node->safe_psql('postgres',
-               "SELECT oid FROM pg_database WHERE datname = 'postgres'");
-       my $prefix               = "base/$db_oid/";
-       my $filepaths_referenced = $node->safe_psql(
-               'postgres', "
-          SELECT pg_relation_filepath(oid) FROM pg_class
-          WHERE reltablespace = 0 AND relpersistence <> 't' AND
-          pg_relation_filepath(oid) IS NOT NULL;");
-       is_deeply(
-               [
-                       sort(map { "$prefix$_" }
-                                 grep(/^[0-9]+$/, slurp_dir($node->data_dir . "/$prefix")))
-               ],
-               [ sort split /\n/, $filepaths_referenced ],
-               $test_name);
-       return;
-}
-
-# We run this same test suite for both wal_level=minimal and hot_standby.
-sub run_wal_optimize
-{
-       my $wal_level = shift;
-
-       my $node = get_new_node("node_$wal_level");
-       $node->init;
-       $node->append_conf(
-               'postgresql.conf', qq(
-wal_level = $wal_level
-max_prepared_transactions = 1
-wal_log_hints = on
-wal_skip_threshold = 0
-#wal_debug = on
-));
-       $node->start;
-
-       # Setup
-       my $tablespace_dir = $node->basedir . '/tablespace_other';
-       mkdir($tablespace_dir);
-       $tablespace_dir = TestLib::perl2host($tablespace_dir);
-       $node->safe_psql('postgres',
-               "CREATE TABLESPACE other LOCATION '$tablespace_dir';");
-
-       # Test direct truncation optimization.  No tuples.
-       $node->safe_psql(
-               'postgres', "
-               BEGIN;
-               CREATE TABLE trunc (id serial PRIMARY KEY);
-               TRUNCATE trunc;
-               COMMIT;");
-       $node->stop('immediate');
-       $node->start;
-       my $result = $node->safe_psql('postgres', "SELECT count(*) FROM trunc;");
-       is($result, qq(0), "wal_level = $wal_level, TRUNCATE with empty table");
-
-       # Test truncation with inserted tuples within the same transaction.
-       # Tuples inserted after the truncation should be seen.
-       $node->safe_psql(
-               'postgres', "
-               BEGIN;
-               CREATE TABLE trunc_ins (id serial PRIMARY KEY);
-               INSERT INTO trunc_ins VALUES (DEFAULT);
-               TRUNCATE trunc_ins;
-               INSERT INTO trunc_ins VALUES (DEFAULT);
-               COMMIT;");
-       $node->stop('immediate');
-       $node->start;
-       $result = $node->safe_psql('postgres',
-               "SELECT count(*), min(id) FROM trunc_ins;");
-       is($result, qq(1|2), "wal_level = $wal_level, TRUNCATE INSERT");
-
-       # Same for prepared transaction.
-       # Tuples inserted after the truncation should be seen.
-       $node->safe_psql(
-               'postgres', "
-               BEGIN;
-               CREATE TABLE twophase (id serial PRIMARY KEY);
-               INSERT INTO twophase VALUES (DEFAULT);
-               TRUNCATE twophase;
-               INSERT INTO twophase VALUES (DEFAULT);
-               PREPARE TRANSACTION 't';
-               COMMIT PREPARED 't';");
-       $node->stop('immediate');
-       $node->start;
-       $result = $node->safe_psql('postgres',
-               "SELECT count(*), min(id) FROM trunc_ins;");
-       is($result, qq(1|2), "wal_level = $wal_level, TRUNCATE INSERT PREPARE");
-
-       # Writing WAL at end of xact, instead of syncing.
-       $node->safe_psql(
-               'postgres', "
-               SET wal_skip_threshold = '1TB';
-               BEGIN;
-               CREATE TABLE noskip (id serial PRIMARY KEY);
-               INSERT INTO noskip (SELECT FROM generate_series(1, 20000) a) ;
-               COMMIT;");
-       $node->stop('immediate');
-       $node->start;
-       $result = $node->safe_psql('postgres', "SELECT count(*) FROM noskip;");
-       is($result, qq(20000), "wal_level = $wal_level, end-of-xact WAL");
-
-       # Data file for COPY query in subsequent tests
-       my $basedir   = $node->basedir;
-       my $copy_file = "$basedir/copy_data.txt";
-       TestLib::append_to_file(
-               $copy_file, qq(20000,30000
-20001,30001
-20002,30002));
-
-       # Test truncation with inserted tuples using both INSERT and COPY.  Tuples
-       # inserted after the truncation should be seen.
-       $node->safe_psql(
-               'postgres', "
-               BEGIN;
-               CREATE TABLE ins_trunc (id serial PRIMARY KEY, id2 int);
-               INSERT INTO ins_trunc VALUES (DEFAULT, generate_series(1,10000));
-               TRUNCATE ins_trunc;
-               INSERT INTO ins_trunc (id, id2) VALUES (DEFAULT, 10000);
-               COPY ins_trunc FROM '$copy_file' DELIMITER ',';
-               INSERT INTO ins_trunc (id, id2) VALUES (DEFAULT, 10000);
-               COMMIT;");
-       $node->stop('immediate');
-       $node->start;
-       $result = $node->safe_psql('postgres', "SELECT count(*) FROM ins_trunc;");
-       is($result, qq(5), "wal_level = $wal_level, TRUNCATE COPY INSERT");
-
-       # Test truncation with inserted tuples using COPY.  Tuples copied after
-       # the truncation should be seen.
-       $node->safe_psql(
-               'postgres', "
-               BEGIN;
-               CREATE TABLE trunc_copy (id serial PRIMARY KEY, id2 int);
-               INSERT INTO trunc_copy VALUES (DEFAULT, generate_series(1,3000));
-               TRUNCATE trunc_copy;
-               COPY trunc_copy FROM '$copy_file' DELIMITER ',';
-               COMMIT;");
-       $node->stop('immediate');
-       $node->start;
-       $result =
-         $node->safe_psql('postgres', "SELECT count(*) FROM trunc_copy;");
-       is($result, qq(3), "wal_level = $wal_level, TRUNCATE COPY");
-
-       # Like previous test, but rollback SET TABLESPACE in a subtransaction.
-       $node->safe_psql(
-               'postgres', "
-               BEGIN;
-               CREATE TABLE spc_abort (id serial PRIMARY KEY, id2 int);
-               INSERT INTO spc_abort VALUES (DEFAULT, generate_series(1,3000));
-               TRUNCATE spc_abort;
-               SAVEPOINT s;
-                 ALTER TABLE spc_abort SET TABLESPACE other; ROLLBACK TO s;
-               COPY spc_abort FROM '$copy_file' DELIMITER ',';
-               COMMIT;");
-       $node->stop('immediate');
-       $node->start;
-       $result = $node->safe_psql('postgres', "SELECT count(*) FROM spc_abort;");
-       is($result, qq(3),
-               "wal_level = $wal_level, SET TABLESPACE abort subtransaction");
-
-       # in different subtransaction patterns
-       $node->safe_psql(
-               'postgres', "
-               BEGIN;
-               CREATE TABLE spc_commit (id serial PRIMARY KEY, id2 int);
-               INSERT INTO spc_commit VALUES (DEFAULT, generate_series(1,3000));
-               TRUNCATE spc_commit;
-               SAVEPOINT s; ALTER TABLE spc_commit SET TABLESPACE other; RELEASE s;
-               COPY spc_commit FROM '$copy_file' DELIMITER ',';
-               COMMIT;");
-       $node->stop('immediate');
-       $node->start;
-       $result =
-         $node->safe_psql('postgres', "SELECT count(*) FROM spc_commit;");
-       is($result, qq(3),
-               "wal_level = $wal_level, SET TABLESPACE commit subtransaction");
-
-       $node->safe_psql(
-               'postgres', "
-               BEGIN;
-               CREATE TABLE spc_nest (id serial PRIMARY KEY, id2 int);
-               INSERT INTO spc_nest VALUES (DEFAULT, generate_series(1,3000));
-               TRUNCATE spc_nest;
-               SAVEPOINT s;
-                       ALTER TABLE spc_nest SET TABLESPACE other;
-                       SAVEPOINT s2;
-                               ALTER TABLE spc_nest SET TABLESPACE pg_default;
-                       ROLLBACK TO s2;
-                       SAVEPOINT s2;
-                               ALTER TABLE spc_nest SET TABLESPACE pg_default;
-                       RELEASE s2;
-               ROLLBACK TO s;
-               COPY spc_nest FROM '$copy_file' DELIMITER ',';
-               COMMIT;");
-       $node->stop('immediate');
-       $node->start;
-       $result = $node->safe_psql('postgres', "SELECT count(*) FROM spc_nest;");
-       is($result, qq(3),
-               "wal_level = $wal_level, SET TABLESPACE nested subtransaction");
-
-       $node->safe_psql(
-               'postgres', "
-               CREATE TABLE spc_hint (id int);
-               INSERT INTO spc_hint VALUES (1);
-               BEGIN;
-               ALTER TABLE spc_hint SET TABLESPACE other;
-               CHECKPOINT;
-               SELECT * FROM spc_hint;  -- set hint bit
-               INSERT INTO spc_hint VALUES (2);
-               COMMIT;");
-       $node->stop('immediate');
-       $node->start;
-       $result = $node->safe_psql('postgres', "SELECT count(*) FROM spc_hint;");
-       is($result, qq(2), "wal_level = $wal_level, SET TABLESPACE, hint bit");
-
-       $node->safe_psql(
-               'postgres', "
-               BEGIN;
-               CREATE TABLE idx_hint (c int PRIMARY KEY);
-               SAVEPOINT q; INSERT INTO idx_hint VALUES (1); ROLLBACK TO q;
-               CHECKPOINT;
-               INSERT INTO idx_hint VALUES (1);  -- set index hint bit
-               INSERT INTO idx_hint VALUES (2);
-               COMMIT;");
-       $node->stop('immediate');
-       $node->start;
-       $result = $node->psql('postgres',);
-       my ($ret, $stdout, $stderr) =
-         $node->psql('postgres', "INSERT INTO idx_hint VALUES (2);");
-       is($ret, qq(3), "wal_level = $wal_level, unique index LP_DEAD");
-       like(
-               $stderr,
-               qr/violates unique/,
-               "wal_level = $wal_level, unique index LP_DEAD message");
-
-       # UPDATE touches two buffers for one row.
-       $node->safe_psql(
-               'postgres', "
-               BEGIN;
-               CREATE TABLE upd (id serial PRIMARY KEY, id2 int);
-               INSERT INTO upd (id, id2) VALUES (DEFAULT, generate_series(1,10000));
-               COPY upd FROM '$copy_file' DELIMITER ',';
-               UPDATE upd SET id2 = id2 + 1;
-               DELETE FROM upd;
-               COMMIT;");
-       $node->stop('immediate');
-       $node->start;
-       $result = $node->safe_psql('postgres', "SELECT count(*) FROM upd;");
-       is($result, qq(0),
-               "wal_level = $wal_level, UPDATE touches two buffers for one row");
-
-       # Test consistency of COPY with INSERT for table created in the same
-       # transaction.
-       $node->safe_psql(
-               'postgres', "
-               BEGIN;
-               CREATE TABLE ins_copy (id serial PRIMARY KEY, id2 int);
-               INSERT INTO ins_copy VALUES (DEFAULT, 1);
-               COPY ins_copy FROM '$copy_file' DELIMITER ',';
-               COMMIT;");
-       $node->stop('immediate');
-       $node->start;
-       $result = $node->safe_psql('postgres', "SELECT count(*) FROM ins_copy;");
-       is($result, qq(4), "wal_level = $wal_level, INSERT COPY");
-
-       # Test consistency of COPY that inserts more to the same table using
-       # triggers.  If the INSERTS from the trigger go to the same block data
-       # is copied to, and the INSERTs are WAL-logged, WAL replay will fail when
-       # it tries to replay the WAL record but the "before" image doesn't match,
-       # because not all changes were WAL-logged.
-       $node->safe_psql(
-               'postgres', "
-               BEGIN;
-               CREATE TABLE ins_trig (id serial PRIMARY KEY, id2 text);
-               CREATE FUNCTION ins_trig_before_row_trig() RETURNS trigger
-                 LANGUAGE plpgsql as \$\$
-                 BEGIN
-                       IF new.id2 NOT LIKE 'triggered%' THEN
-                         INSERT INTO ins_trig
-                               VALUES (DEFAULT, 'triggered row before' || NEW.id2);
-                       END IF;
-                       RETURN NEW;
-                 END; \$\$;
-               CREATE FUNCTION ins_trig_after_row_trig() RETURNS trigger
-                 LANGUAGE plpgsql as \$\$
-                 BEGIN
-                       IF new.id2 NOT LIKE 'triggered%' THEN
-                         INSERT INTO ins_trig
-                               VALUES (DEFAULT, 'triggered row after' || NEW.id2);
-                       END IF;
-                       RETURN NEW;
-                 END; \$\$;
-               CREATE TRIGGER ins_trig_before_row_insert
-                 BEFORE INSERT ON ins_trig
-                 FOR EACH ROW EXECUTE PROCEDURE ins_trig_before_row_trig();
-               CREATE TRIGGER ins_trig_after_row_insert
-                 AFTER INSERT ON ins_trig
-                 FOR EACH ROW EXECUTE PROCEDURE ins_trig_after_row_trig();
-               COPY ins_trig FROM '$copy_file' DELIMITER ',';
-               COMMIT;");
-       $node->stop('immediate');
-       $node->start;
-       $result = $node->safe_psql('postgres', "SELECT count(*) FROM ins_trig;");
-       is($result, qq(9), "wal_level = $wal_level, COPY with INSERT triggers");
-
-       # Test consistency of INSERT, COPY and TRUNCATE in same transaction block
-       # with TRUNCATE triggers.
-       $node->safe_psql(
-               'postgres', "
-               BEGIN;
-               CREATE TABLE trunc_trig (id serial PRIMARY KEY, id2 text);
-               CREATE FUNCTION trunc_trig_before_stat_trig() RETURNS trigger
-                 LANGUAGE plpgsql as \$\$
-                 BEGIN
-                       INSERT INTO trunc_trig VALUES (DEFAULT, 'triggered stat before');
-                       RETURN NULL;
-                 END; \$\$;
-               CREATE FUNCTION trunc_trig_after_stat_trig() RETURNS trigger
-                 LANGUAGE plpgsql as \$\$
-                 BEGIN
-                       INSERT INTO trunc_trig VALUES (DEFAULT, 'triggered stat before');
-                       RETURN NULL;
-                 END; \$\$;
-               CREATE TRIGGER trunc_trig_before_stat_truncate
-                 BEFORE TRUNCATE ON trunc_trig
-                 FOR EACH STATEMENT EXECUTE PROCEDURE trunc_trig_before_stat_trig();
-               CREATE TRIGGER trunc_trig_after_stat_truncate
-                 AFTER TRUNCATE ON trunc_trig
-                 FOR EACH STATEMENT EXECUTE PROCEDURE trunc_trig_after_stat_trig();
-               INSERT INTO trunc_trig VALUES (DEFAULT, 1);
-               TRUNCATE trunc_trig;
-               COPY trunc_trig FROM '$copy_file' DELIMITER ',';
-               COMMIT;");
-       $node->stop('immediate');
-       $node->start;
-       $result =
-         $node->safe_psql('postgres', "SELECT count(*) FROM trunc_trig;");
-       is($result, qq(4),
-               "wal_level = $wal_level, TRUNCATE COPY with TRUNCATE triggers");
-
-       # Test redo of temp table creation.
-       $node->safe_psql(
-               'postgres', "
-               CREATE TEMP TABLE temp (id serial PRIMARY KEY, id2 text);");
-       $node->stop('immediate');
-       $node->start;
-       check_orphan_relfilenodes($node,
-               "wal_level = $wal_level, no orphan relfilenode remains");
-
-       return;
-}
-
-# Run same test suite for multiple wal_level values.
-run_wal_optimize("minimal");
-run_wal_optimize("hot_standby");
diff --git a/src/test/regress/expected/alter_table.out b/src/test/regress/expected/alter_table.out

index e9c16ce43bda5941438d1146e855c1c4a5999665..075637ce56d86c84f1196f0bdebe3b02e1dc147d 100644 (file)
--- a/src/test/regress/expected/alter_table.out
+++ b/src/test/regress/expected/alter_table.out
@@ -1986,12 +1986,6 @@ select * from another;
  (3 rows)
  
  drop table another;
--- Create an index that skips WAL, then perform a SET DATA TYPE that skips
--- rewriting the index.
-begin;
-create table skip_wal_skip_rewrite_index (c varchar(10) primary key);
-alter table skip_wal_skip_rewrite_index alter c type varchar(20);
-commit;
  -- table's row type
  create table tab1 (a int, b text);
  create table tab2 (x int, y tab1);
diff --git a/src/test/regress/expected/create_table.out b/src/test/regress/expected/create_table.out

index d35b05d2f8ab4f98b6cab0cff58c8c07d1b3e9d7..3b141834afbcdf9ee0331fcc952c754dc92ebc7c 100644 (file)
--- a/src/test/regress/expected/create_table.out
+++ b/src/test/regress/expected/create_table.out
@@ -264,16 +264,3 @@ CREATE TABLE IF NOT EXISTS as_select1 AS EXECUTE select1;
  NOTICE:  relation "as_select1" already exists, skipping
  DROP TABLE as_select1;
  DEALLOCATE select1;
--- Verify that subtransaction rollback restores rd_createSubid.
-BEGIN;
-CREATE TABLE remember_create_subid (c int);
-SAVEPOINT q; DROP TABLE remember_create_subid; ROLLBACK TO q;
-COMMIT;
-DROP TABLE remember_create_subid;
--- Verify that subtransaction rollback restores rd_firstRelfilenodeSubid.
-CREATE TABLE remember_node_subid (c int);
-BEGIN;
-ALTER TABLE remember_node_subid ALTER c TYPE bigint;
-SAVEPOINT q; DROP TABLE remember_node_subid; ROLLBACK TO q;
-COMMIT;
-DROP TABLE remember_node_subid;
diff --git a/src/test/regress/sql/alter_table.sql b/src/test/regress/sql/alter_table.sql

index 51333613d01155c753ff864a9915e606f4e3e143..5f06a768545ded5a82830b3cdc11747152a097c4 100644 (file)
--- a/src/test/regress/sql/alter_table.sql
+++ b/src/test/regress/sql/alter_table.sql
@@ -1334,13 +1334,6 @@ select * from another;
  
  drop table another;
  
--- Create an index that skips WAL, then perform a SET DATA TYPE that skips
--- rewriting the index.
-begin;
-create table skip_wal_skip_rewrite_index (c varchar(10) primary key);
-alter table skip_wal_skip_rewrite_index alter c type varchar(20);
-commit;
-
  -- table's row type
  create table tab1 (a int, b text);
  create table tab2 (x int, y tab1);
diff --git a/src/test/regress/sql/create_table.sql b/src/test/regress/sql/create_table.sql

index 8cc3c4310a03d4945ac34dbbdb2573bd9ddd5b0b..0a0497e53b0d84480449da4df733fc887c8db309 100644 (file)
--- a/src/test/regress/sql/create_table.sql
+++ b/src/test/regress/sql/create_table.sql
@@ -273,18 +273,3 @@ SELECT * FROM as_select1;
  CREATE TABLE IF NOT EXISTS as_select1 AS EXECUTE select1;
  DROP TABLE as_select1;
  DEALLOCATE select1;
-
--- Verify that subtransaction rollback restores rd_createSubid.
-BEGIN;
-CREATE TABLE remember_create_subid (c int);
-SAVEPOINT q; DROP TABLE remember_create_subid; ROLLBACK TO q;
-COMMIT;
-DROP TABLE remember_create_subid;
-
--- Verify that subtransaction rollback restores rd_firstRelfilenodeSubid.
-CREATE TABLE remember_node_subid (c int);
-BEGIN;
-ALTER TABLE remember_node_subid ALTER c TYPE bigint;
-SAVEPOINT q; DROP TABLE remember_node_subid; ROLLBACK TO q;
-COMMIT;
-DROP TABLE remember_node_subid;
author	Noah Misch <noah@leadboat.com>
	Sun, 22 Mar 2020 16:24:09 +0000 (09:24 -0700)
committer	Noah Misch <noah@leadboat.com>
	Sun, 22 Mar 2020 16:24:15 +0000 (09:24 -0700)
doc/src/sgml/config.sgml		patch \| blob \| blame \| history
doc/src/sgml/perform.sgml		patch \| blob \| blame \| history
src/backend/access/gist/gistbuild.c		patch \| blob \| blame \| history
src/backend/access/gist/gistutil.c		patch \| blob \| blame \| history
src/backend/access/gist/gistxlog.c		patch \| blob \| blame \| history
src/backend/access/heap/heapam.c		patch \| blob \| blame \| history
src/backend/access/heap/rewriteheap.c		patch \| blob \| blame \| history
src/backend/access/nbtree/nbtsort.c		patch \| blob \| blame \| history
src/backend/access/rmgrdesc/gistdesc.c		patch \| blob \| blame \| history
src/backend/access/transam/README		patch \| blob \| blame \| history
src/backend/access/transam/xact.c		patch \| blob \| blame \| history
src/backend/access/transam/xlogutils.c		patch \| blob \| blame \| history
src/backend/bootstrap/bootparse.y		patch \| blob \| blame \| history
src/backend/catalog/storage.c		patch \| blob \| blame \| history
src/backend/commands/cluster.c		patch \| blob \| blame \| history
src/backend/commands/copy.c		patch \| blob \| blame \| history
src/backend/commands/createas.c		patch \| blob \| blame \| history
src/backend/commands/matview.c		patch \| blob \| blame \| history
src/backend/commands/tablecmds.c		patch \| blob \| blame \| history
src/backend/nodes/copyfuncs.c		patch \| blob \| blame \| history
src/backend/nodes/equalfuncs.c		patch \| blob \| blame \| history
src/backend/nodes/outfuncs.c		patch \| blob \| blame \| history
src/backend/parser/gram.y		patch \| blob \| blame \| history
src/backend/parser/parse_utilcmd.c		patch \| blob \| blame \| history
src/backend/storage/buffer/bufmgr.c		patch \| blob \| blame \| history
src/backend/storage/lmgr/lock.c		patch \| blob \| blame \| history
src/backend/storage/smgr/md.c		patch \| blob \| blame \| history
src/backend/storage/smgr/smgr.c		patch \| blob \| blame \| history
src/backend/utils/cache/relcache.c		patch \| blob \| blame \| history
src/backend/utils/misc/guc.c		patch \| blob \| blame \| history
src/backend/utils/misc/postgresql.conf.sample		patch \| blob \| blame \| history
src/include/access/gist_private.h		patch \| blob \| blame \| history
src/include/access/heapam.h		patch \| blob \| blame \| history
src/include/access/rewriteheap.h		patch \| blob \| blame \| history
src/include/catalog/storage.h		patch \| blob \| blame \| history
src/include/nodes/parsenodes.h		patch \| blob \| blame \| history
src/include/storage/bufmgr.h		patch \| blob \| blame \| history
src/include/storage/lock.h		patch \| blob \| blame \| history
src/include/storage/smgr.h		patch \| blob \| blame \| history
src/include/utils/rel.h		patch \| blob \| blame \| history
src/include/utils/relcache.h		patch \| blob \| blame \| history
src/test/recovery/t/018_wal_optimize.pl	[deleted file]	patch \| blob \| blame \| history
src/test/regress/expected/alter_table.out		patch \| blob \| blame \| history
src/test/regress/expected/create_table.out		patch \| blob \| blame \| history
src/test/regress/sql/alter_table.sql		patch \| blob \| blame \| history
src/test/regress/sql/create_table.sql		patch \| blob \| blame \| history