From: Michael Paquier Date: Tue, 14 Apr 2026 20:09:05 +0000 (+0900) Subject: Add tests for low-level PGLZ [de]compression routines X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=67d318e70402b0eb450c7e20c0378b88e5a6df1d;p=thirdparty%2Fpostgresql.git Add tests for low-level PGLZ [de]compression routines The goal of this module is to provide an entry point for the coverage of the low-level compression and decompression PGLZ routines. The new test is moved to a new parallel group, with all the existing compression-related tests added to it. This includes tests for the cases detected by fuzzing that emulate corrupted compressed data, as fixed by 2b5ba2a0a141: - Set control bit with read of a match tag, where no data follows. - Set control bit with read of a match tag, where 1 byte follows. - Set control bit with match tag where length nibble is 3 bytes (extended case). While on it, some tests are added for compress/decompress roundtrips, and for check_complete=false/true. Like 2b5ba2a0a141, backpatch to all the stable branches. Discussion: https://postgr.es/m/adw647wuGjh1oU6p@paquier.xyz Backpatch-through: 14 --- diff --git a/src/test/regress/expected/compression_pglz.out b/src/test/regress/expected/compression_pglz.out new file mode 100644 index 00000000000..0ef49d42506 --- /dev/null +++ b/src/test/regress/expected/compression_pglz.out @@ -0,0 +1,65 @@ +-- +-- Tests for PGLZ compression +-- +-- directory paths and dlsuffix are passed to us in environment variables +\getenv libdir PG_LIBDIR +\getenv dlsuffix PG_DLSUFFIX +\set regresslib :libdir '/regress' :dlsuffix +CREATE FUNCTION test_pglz_compress(bytea) + RETURNS bytea + AS :'regresslib' LANGUAGE C STRICT; +CREATE FUNCTION test_pglz_decompress(bytea, int4, bool) + RETURNS bytea + AS :'regresslib' LANGUAGE C STRICT; +-- Round-trip with pglz: compress then decompress. +SELECT test_pglz_decompress(test_pglz_compress( + decode(repeat('abcd', 100), 'escape')), 400, false) = + decode(repeat('abcd', 100), 'escape') AS roundtrip_ok; + roundtrip_ok +-------------- + t +(1 row) + +SELECT test_pglz_decompress(test_pglz_compress( + decode(repeat('abcd', 100), 'escape')), 400, true) = + decode(repeat('abcd', 100), 'escape') AS roundtrip_ok; + roundtrip_ok +-------------- + t +(1 row) + +-- Decompression with rawsize too large, fails to fill the destination +-- buffer. +SELECT test_pglz_decompress(test_pglz_compress( + decode(repeat('abcd', 100), 'escape')), 500, true); +ERROR: pglz_decompress failed +-- Decompression with rawsize too small, fails with source not fully +-- consumed. +SELECT test_pglz_decompress(test_pglz_compress( + decode(repeat('abcd', 100), 'escape')), 100, true); +ERROR: pglz_decompress failed +-- Corrupted compressed data. Set control bit with read of a match tag, +-- no data follows. +SELECT length(test_pglz_decompress('\x01'::bytea, 1024, false)) AS ctrl_only_len; + ctrl_only_len +--------------- + 0 +(1 row) + +SELECT test_pglz_decompress('\x01'::bytea, 1024, true); +ERROR: pglz_decompress failed +-- Corrupted compressed data. Set control bit with read of a match tag, +-- 1 byte follows. +SELECT test_pglz_decompress('\x01ff'::bytea, 1024, false); +ERROR: pglz_decompress failed +SELECT test_pglz_decompress('\x01ff'::bytea, 1024, true); +ERROR: pglz_decompress failed +-- Corrupted compressed data. Set control bit with match tag where length +-- nibble is 3 bytes (extended length), no data follows. +SELECT test_pglz_decompress('\x010f01'::bytea, 1024, false); +ERROR: pglz_decompress failed +SELECT test_pglz_decompress('\x010f01'::bytea, 1024, true); +ERROR: pglz_decompress failed +-- Clean up +DROP FUNCTION test_pglz_compress; +DROP FUNCTION test_pglz_decompress; diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule index cc365393bb7..288e94dc408 100644 --- a/src/test/regress/parallel_schedule +++ b/src/test/regress/parallel_schedule @@ -123,7 +123,12 @@ test: plancache limit plpgsql copy2 temp domain rangefuncs prepare conversion tr # The stats test resets stats, so nothing else needing stats access can be in # this group. # ---------- -test: partition_merge partition_split partition_join partition_prune reloptions hash_part indexing partition_aggregate partition_info tuplesort explain compression compression_lz4 memoize stats predicate numa eager_aggregate graph_table_rls planner_est +test: partition_merge partition_split partition_join partition_prune reloptions hash_part indexing partition_aggregate partition_info tuplesort explain memoize stats predicate numa eager_aggregate graph_table_rls planner_est + +# ---------- +# Another group of parallel tests (compression) +# ---------- +test: compression compression_lz4 compression_pglz # event_trigger depends on create_am and cannot run concurrently with # any test that runs DDL diff --git a/src/test/regress/regress.c b/src/test/regress/regress.c index 5c19f70b6e8..2bcb5559a45 100644 --- a/src/test/regress/regress.c +++ b/src/test/regress/regress.c @@ -27,6 +27,7 @@ #include "catalog/pg_type.h" #include "commands/sequence.h" #include "commands/trigger.h" +#include "common/pg_lzcompress.h" #include "executor/executor.h" #include "executor/functions.h" #include "executor/spi.h" @@ -1422,3 +1423,68 @@ test_instr_time(PG_FUNCTION_ARGS) PG_RETURN_BOOL(true); } + +/* + * test_pglz_compress + * + * Compress the input using pglz_compress(). Only the "always" strategy is + * currently supported. + * + * Returns the compressed data, or NULL if compression fails. + */ +PG_FUNCTION_INFO_V1(test_pglz_compress); +Datum +test_pglz_compress(PG_FUNCTION_ARGS) +{ + bytea *input = PG_GETARG_BYTEA_PP(0); + char *source = VARDATA_ANY(input); + int32 slen = VARSIZE_ANY_EXHDR(input); + int32 maxout = PGLZ_MAX_OUTPUT(slen); + bytea *result; + int32 clen; + + result = (bytea *) palloc(maxout + VARHDRSZ); + clen = pglz_compress(source, slen, VARDATA(result), + PGLZ_strategy_always); + if (clen < 0) + PG_RETURN_NULL(); + + SET_VARSIZE(result, clen + VARHDRSZ); + PG_RETURN_BYTEA_P(result); +} + +/* + * test_pglz_decompress + * + * Decompress the input using pglz_decompress(). + * + * The second argument is the expected uncompressed data size. The third + * argument is here for the check_complete flag. + * + * Returns the decompressed data, or raises an error if decompression fails. + */ +PG_FUNCTION_INFO_V1(test_pglz_decompress); +Datum +test_pglz_decompress(PG_FUNCTION_ARGS) +{ + bytea *input = PG_GETARG_BYTEA_PP(0); + int32 rawsize = PG_GETARG_INT32(1); + bool check_complete = PG_GETARG_BOOL(2); + char *source = VARDATA_ANY(input); + int32 slen = VARSIZE_ANY_EXHDR(input); + bytea *result; + int32 dlen; + + if (rawsize < 0) + elog(ERROR, "rawsize must not be negative"); + + result = (bytea *) palloc(rawsize + VARHDRSZ); + + dlen = pglz_decompress(source, slen, VARDATA(result), + rawsize, check_complete); + if (dlen < 0) + elog(ERROR, "pglz_decompress failed"); + + SET_VARSIZE(result, dlen + VARHDRSZ); + PG_RETURN_BYTEA_P(result); +} diff --git a/src/test/regress/sql/compression_pglz.sql b/src/test/regress/sql/compression_pglz.sql new file mode 100644 index 00000000000..a44af02afb7 --- /dev/null +++ b/src/test/regress/sql/compression_pglz.sql @@ -0,0 +1,53 @@ +-- +-- Tests for PGLZ compression +-- + +-- directory paths and dlsuffix are passed to us in environment variables +\getenv libdir PG_LIBDIR +\getenv dlsuffix PG_DLSUFFIX + +\set regresslib :libdir '/regress' :dlsuffix + +CREATE FUNCTION test_pglz_compress(bytea) + RETURNS bytea + AS :'regresslib' LANGUAGE C STRICT; +CREATE FUNCTION test_pglz_decompress(bytea, int4, bool) + RETURNS bytea + AS :'regresslib' LANGUAGE C STRICT; + +-- Round-trip with pglz: compress then decompress. +SELECT test_pglz_decompress(test_pglz_compress( + decode(repeat('abcd', 100), 'escape')), 400, false) = + decode(repeat('abcd', 100), 'escape') AS roundtrip_ok; +SELECT test_pglz_decompress(test_pglz_compress( + decode(repeat('abcd', 100), 'escape')), 400, true) = + decode(repeat('abcd', 100), 'escape') AS roundtrip_ok; + +-- Decompression with rawsize too large, fails to fill the destination +-- buffer. +SELECT test_pglz_decompress(test_pglz_compress( + decode(repeat('abcd', 100), 'escape')), 500, true); + +-- Decompression with rawsize too small, fails with source not fully +-- consumed. +SELECT test_pglz_decompress(test_pglz_compress( + decode(repeat('abcd', 100), 'escape')), 100, true); + +-- Corrupted compressed data. Set control bit with read of a match tag, +-- no data follows. +SELECT length(test_pglz_decompress('\x01'::bytea, 1024, false)) AS ctrl_only_len; +SELECT test_pglz_decompress('\x01'::bytea, 1024, true); + +-- Corrupted compressed data. Set control bit with read of a match tag, +-- 1 byte follows. +SELECT test_pglz_decompress('\x01ff'::bytea, 1024, false); +SELECT test_pglz_decompress('\x01ff'::bytea, 1024, true); + +-- Corrupted compressed data. Set control bit with match tag where length +-- nibble is 3 bytes (extended length), no data follows. +SELECT test_pglz_decompress('\x010f01'::bytea, 1024, false); +SELECT test_pglz_decompress('\x010f01'::bytea, 1024, true); + +-- Clean up +DROP FUNCTION test_pglz_compress; +DROP FUNCTION test_pglz_decompress;