Fix 'unexpected data beyond EOF' on replica restart

author Heikki Linnakangas <heikki.linnakangas@iki.fi>

Thu, 15 Jan 2026 18:57:12 +0000 (20:57 +0200)

committer Heikki Linnakangas <heikki.linnakangas@iki.fi>

Thu, 15 Jan 2026 18:58:05 +0000 (20:58 +0200)
author Heikki Linnakangas <heikki.linnakangas@iki.fi>
Thu, 15 Jan 2026 18:57:12 +0000 (20:57 +0200)
committer Heikki Linnakangas <heikki.linnakangas@iki.fi>
Thu, 15 Jan 2026 18:58:05 +0000 (20:58 +0200)
diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c

index b48a6d496819a23cf59207c23b3d38f257372ea6..f0e5abb2e2211c0352e3de2d3952afd60039787f 100644 (file)
--- a/src/backend/storage/smgr/md.c
+++ b/src/backend/storage/smgr/md.c
@@ -825,6 +825,9 @@ mdnblocks(SMgrRelation reln, ForkNumber forknum)
   * functions for this relation or handled interrupts in between.  This makes
   * sure we have opened all active segments, so that truncate loop will get
   * them all!
+ *
+ * If nblocks > curnblk, the request is ignored when we are in InRecovery,
+ * otherwise, an error is raised.
   */
  void
  mdtruncate(SMgrRelation reln, ForkNumber forknum,
diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c

index 8892473d8588a7c453c44e410f9469245a3b0e3f..880f59fea1dc8453544b0bdc7f787c7035b8106e 100644 (file)
--- a/src/backend/storage/smgr/smgr.c
+++ b/src/backend/storage/smgr/smgr.c
@@ -652,11 +652,20 @@ smgrtruncate2(SMgrRelation reln, ForkNumber *forknum, int nforks,
                 /*
                  * We might as well update the local smgr_cached_nblocks values. The
                  * smgr cache inval message that this function sent will cause other
-                * backends to invalidate their copies of smgr_fsm_nblocks and
-                * smgr_vm_nblocks, and these ones too at the next command boundary.
-                * But these ensure they aren't outright wrong until then.
+                * backends to invalidate their copies of smgr_cached_nblocks, and
+                * these ones too at the next command boundary. But ensure they aren't
+                * outright wrong until then.
+                *
+                * We can have nblocks > old_nblocks when a relation was truncated
+                * multiple times, a replica applied all the truncations, and later
+                * restarts from a restartpoint located before the truncations. The
+                * relation on disk will be the size of the last truncate. When
+                * replaying the first truncate, we will have nblocks > current size.
+                * In such cases, smgr_truncate does nothing, so set the cached size
+                * to the old size rather than the requested size.
                  */
-               reln->smgr_cached_nblocks[forknum[i]] = nblocks[i];
+               reln->smgr_cached_nblocks[forknum[i]] =
+                       nblocks[i] > old_nblocks[i] ? old_nblocks[i] : nblocks[i];
         }
  }
author	Heikki Linnakangas <heikki.linnakangas@iki.fi>
	Thu, 15 Jan 2026 18:57:12 +0000 (20:57 +0200)
committer	Heikki Linnakangas <heikki.linnakangas@iki.fi>
	Thu, 15 Jan 2026 18:58:05 +0000 (20:58 +0200)
src/backend/storage/smgr/md.c		patch \| blob \| blame \| history
src/backend/storage/smgr/smgr.c		patch \| blob \| blame \| history