The FK fast-path batching added in
b7b27eb41a5 wrote the incoming row
into the batch array before checking whether the array was full:
fpentry->batch[fpentry->batch_count] = ExecCopySlotHeapTuple(newslot);
fpentry->batch_count++;
if (fpentry->batch_count >= RI_FASTPATH_BATCH_SIZE)
ri_FastPathBatchFlush(fpentry, fk_rel, riinfo);
batch_count is reset to zero only at the end of ri_FastPathBatchFlush(),
so it remains at RI_FASTPATH_BATCH_SIZE throughout a full-batch flush.
A flush runs user-defined cast functions and equality operators; if that
user code performs DML on the same FK table, ri_FastPathBatchAdd()
re-enters with batch_count == RI_FASTPATH_BATCH_SIZE and writes one past
the end of the array, corrupting the adjacent batch_count field. This
is reachable by an unprivileged table owner via an implicit cast with a
PL/pgSQL function and causes a SIGSEGV in assert-enabled builds.
Fix by bounds-checking the write into the batch array so a re-entrant
add can never write past the end, and by adding a "flushing" flag to
RI_FastPathEntry that routes re-entrant ri_FastPathBatchAdd() calls on
a busy entry to the per-row path (ri_FastPathCheck) instead of touching
the mid-flush batch array. The flag is set around the probe in
ri_FastPathBatchFlush() and cleared in a PG_FINALLY, which also resets
batch_count, so the entry is left empty and reusable if a flush error
(including a reported FK violation) is caught by a savepoint.
Add regression tests for both the re-entrant flush and reuse of an entry
after a flush error caught by a savepoint.
Reported-by: Nikolay Samokhvalov <nik@postgres.ai>
Reviewed-by: Nikolay Samokhvalov <nik@postgres.ai>
Reviewed-by: Ayush Tiwari <ayushtiwari.slg01@gmail.com>
Reviewed-by: Junwang Zhao <zhjwpku@gmail.com>
Discussion: https://postgr.es/m/CAM527d9exRCdWrhJOnAxk_vACg7sr_yPoaJp_+uCFY0qP8v=aw@mail.gmail.com
*/
HeapTuple batch[RI_FASTPATH_BATCH_SIZE];
int batch_count;
+
+ /*
+ * true while this entry's batch is being flushed; guards against
+ * re-entrant ri_FastPathBatchAdd from user code run during the flush.
+ */
+ bool flushing;
} RI_FastPathEntry;
/*
Relation fk_rel, TupleTableSlot *newslot)
{
RI_FastPathEntry *fpentry = ri_FastPathGetEntry(riinfo, fk_rel);
- MemoryContext oldcxt;
- oldcxt = MemoryContextSwitchTo(fpentry->flush_cxt);
- fpentry->batch[fpentry->batch_count] =
- ExecCopySlotHeapTuple(newslot);
- fpentry->batch_count++;
- MemoryContextSwitchTo(oldcxt);
+ /*
+ * If this entry is already being flushed, a cast function or an operator
+ * invoked during the flush has re-entered with DML on the same FK. Fall
+ * back to the per-row path rather than touching the batch array, which is
+ * mid-flush.
+ */
+ if (unlikely(fpentry->flushing))
+ {
+ ri_FastPathCheck(riinfo, fk_rel, newslot);
+ return;
+ }
+
+ /*
+ * Buffer the row. A full batch is flushed below and re-entry is handled
+ * above, so there is always room here; the bounds check just guards the
+ * array write.
+ */
+ if (fpentry->batch_count < RI_FASTPATH_BATCH_SIZE)
+ {
+ MemoryContext oldcxt = MemoryContextSwitchTo(fpentry->flush_cxt);
- if (fpentry->batch_count >= RI_FASTPATH_BATCH_SIZE)
+ fpentry->batch[fpentry->batch_count] =
+ ExecCopySlotHeapTuple(newslot);
+ fpentry->batch_count++;
+ MemoryContextSwitchTo(oldcxt);
+ }
+ else
+ elog(ERROR, "RI fast-path batch unexpectedly full");
+
+ /* Flush as soon as the batch is full. */
+ if (fpentry->batch_count == RI_FASTPATH_BATCH_SIZE)
ri_FastPathBatchFlush(fpentry, fk_rel, riinfo);
}
}
Assert(riinfo->fpmeta);
- /* Skip array overhead for single-row batches. */
- if (riinfo->nkeys == 1 && fpentry->batch_count > 1)
- violation_index = ri_FastPathFlushArray(fpentry, fk_slot, riinfo,
- fk_rel, snapshot, scandesc);
- else
- violation_index = ri_FastPathFlushLoop(fpentry, fk_slot, riinfo,
- fk_rel, snapshot, scandesc);
+ /*
+ * The probe runs user-defined cast and equality functions. Set the
+ * flushing flag around it so a re-entrant ri_FastPathBatchAdd on this
+ * entry takes the per-row path, and clear it even on error so the entry
+ * is reusable if the error is caught by a savepoint.
+ */
+ Assert(!fpentry->flushing);
+ fpentry->flushing = true;
+ PG_TRY();
+ {
+ /* Skip array overhead for single-row batches. */
+ if (riinfo->nkeys == 1 && fpentry->batch_count > 1)
+ violation_index = ri_FastPathFlushArray(fpentry, fk_slot, riinfo,
+ fk_rel, snapshot, scandesc);
+ else
+ violation_index = ri_FastPathFlushLoop(fpentry, fk_slot, riinfo,
+ fk_rel, snapshot, scandesc);
+ }
+ PG_FINALLY();
+ {
+ fpentry->flushing = false;
+ fpentry->batch_count = 0;
+ }
+ PG_END_TRY();
SetUserIdAndSecContext(saved_userid, saved_sec_context);
UnregisterSnapshot(snapshot);
MemoryContextReset(fpentry->flush_cxt);
MemoryContextSwitchTo(oldcxt);
-
- /* Reset. */
- fpentry->batch_count = 0;
}
/*
RegisterAfterTriggerBatchCallback(ri_FastPathEndBatch, NULL);
ri_fastpath_callback_registered = true;
}
+
+ entry->flushing = false;
+ entry->batch_count = 0;
}
return entry;
CREATE TABLE fp_fk_dup (a int REFERENCES fp_pk_dup);
INSERT INTO fp_fk_dup SELECT 1 FROM generate_series(1, 100);
DROP TABLE fp_fk_dup, fp_pk_dup;
+-- Re-entrant FK fast-path: DML on the same FK table from a cast function
+-- during a full-batch flush must not corrupt the batch array.
+CREATE TABLE fp_reentry_pk (id int PRIMARY KEY);
+INSERT INTO fp_reentry_pk VALUES (1), (2);
+CREATE TYPE fp_vch AS (v int);
+CREATE FUNCTION fp_vcast(fp_vch) RETURNS int LANGUAGE plpgsql AS $$
+BEGIN
+ IF $1.v = 1 THEN
+ INSERT INTO fp_reentry_fk VALUES (row(2)::fp_vch);
+ END IF;
+ RETURN $1.v;
+END$$;
+CREATE CAST (fp_vch AS int) WITH FUNCTION fp_vcast(fp_vch) AS IMPLICIT;
+CREATE TABLE fp_reentry_fk (a fp_vch
+ REFERENCES fp_reentry_pk (id));
+-- Fill exactly one batch so the flush fires; the cast re-enters with DML
+-- on the same FK and must take the per-row path.
+INSERT INTO fp_reentry_fk SELECT row(1)::fp_vch FROM generate_series(1, 64);
+SELECT a, count(*) FROM fp_reentry_fk GROUP BY a ORDER BY a;
+ a | count
+-----+-------
+ (1) | 64
+ (2) | 64
+(2 rows)
+
+DROP TABLE fp_reentry_fk, fp_reentry_pk;
+DROP CAST (fp_vch AS int);
+DROP FUNCTION fp_vcast(fp_vch);
+DROP TYPE fp_vch;
+-- Flush error caught by a savepoint must leave the entry empty and reusable.
+CREATE TABLE fp_reentry_pk2 (id int PRIMARY KEY);
+INSERT INTO fp_reentry_pk2 VALUES (1);
+CREATE TABLE fp_reentry_fk2 (a int REFERENCES fp_reentry_pk2 (id));
+DO $$
+BEGIN
+ -- A batch containing a violating row; the flush reports the violation.
+ BEGIN
+ INSERT INTO fp_reentry_fk2 SELECT CASE WHEN g = 32 THEN 999 ELSE 1 END
+ FROM generate_series(1, 64) g;
+ EXCEPTION WHEN foreign_key_violation THEN
+ RAISE NOTICE 'caught fk violation';
+ END;
+
+ -- Reuse the same FK with a full batch in the same transaction. The
+ -- entry must be empty after the caught violation: no stale rows from the
+ -- rolled-back batch (in particular no 999), and no array overflow.
+ INSERT INTO fp_reentry_fk2 SELECT 1 FROM generate_series(1, 64);
+END$$;
+NOTICE: caught fk violation
+SELECT count(*), max(a) FROM fp_reentry_fk2; -- 64 rows, max 1
+ count | max
+-------+-----
+ 64 | 1
+(1 row)
+
+DROP TABLE fp_reentry_fk2, fp_reentry_pk2;
CREATE TABLE fp_fk_dup (a int REFERENCES fp_pk_dup);
INSERT INTO fp_fk_dup SELECT 1 FROM generate_series(1, 100);
DROP TABLE fp_fk_dup, fp_pk_dup;
+
+-- Re-entrant FK fast-path: DML on the same FK table from a cast function
+-- during a full-batch flush must not corrupt the batch array.
+CREATE TABLE fp_reentry_pk (id int PRIMARY KEY);
+INSERT INTO fp_reentry_pk VALUES (1), (2);
+CREATE TYPE fp_vch AS (v int);
+CREATE FUNCTION fp_vcast(fp_vch) RETURNS int LANGUAGE plpgsql AS $$
+BEGIN
+ IF $1.v = 1 THEN
+ INSERT INTO fp_reentry_fk VALUES (row(2)::fp_vch);
+ END IF;
+ RETURN $1.v;
+END$$;
+CREATE CAST (fp_vch AS int) WITH FUNCTION fp_vcast(fp_vch) AS IMPLICIT;
+CREATE TABLE fp_reentry_fk (a fp_vch
+ REFERENCES fp_reentry_pk (id));
+-- Fill exactly one batch so the flush fires; the cast re-enters with DML
+-- on the same FK and must take the per-row path.
+INSERT INTO fp_reentry_fk SELECT row(1)::fp_vch FROM generate_series(1, 64);
+SELECT a, count(*) FROM fp_reentry_fk GROUP BY a ORDER BY a;
+DROP TABLE fp_reentry_fk, fp_reentry_pk;
+DROP CAST (fp_vch AS int);
+DROP FUNCTION fp_vcast(fp_vch);
+DROP TYPE fp_vch;
+
+-- Flush error caught by a savepoint must leave the entry empty and reusable.
+CREATE TABLE fp_reentry_pk2 (id int PRIMARY KEY);
+INSERT INTO fp_reentry_pk2 VALUES (1);
+CREATE TABLE fp_reentry_fk2 (a int REFERENCES fp_reentry_pk2 (id));
+DO $$
+BEGIN
+ -- A batch containing a violating row; the flush reports the violation.
+ BEGIN
+ INSERT INTO fp_reentry_fk2 SELECT CASE WHEN g = 32 THEN 999 ELSE 1 END
+ FROM generate_series(1, 64) g;
+ EXCEPTION WHEN foreign_key_violation THEN
+ RAISE NOTICE 'caught fk violation';
+ END;
+
+ -- Reuse the same FK with a full batch in the same transaction. The
+ -- entry must be empty after the caught violation: no stale rows from the
+ -- rolled-back batch (in particular no 999), and no array overflow.
+ INSERT INTO fp_reentry_fk2 SELECT 1 FROM generate_series(1, 64);
+END$$;
+SELECT count(*), max(a) FROM fp_reentry_fk2; -- 64 rows, max 1
+DROP TABLE fp_reentry_fk2, fp_reentry_pk2;