From ce6bf3cd17d1f96c9c40d2915ef25580fbbc8be7 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Thu, 4 Jun 2026 12:24:51 -0400 Subject: [PATCH] Improve reporting of invalid weight symbols in setweight() et al. This commit addresses two related issues: tsvector_filter() assumed it could print an incorrect weight value with %c. This could result in an invalidly-encoded error message if the database encoding is multibyte and the char value has its high bit set. Weight values that are ASCII control characters could render illegibly too. Fix by printing such values in octal (\ooo), similarly to how charout() would render them. tsvector_setweight() and tsvector_setweight_by_filter() reported the same unrecognized-weight error condition with elog(), as though it were an internal error. That'd not translate, would produce an unwanted XX000 SQLSTATE code, and also reported the bad value as a decimal integer which seems unhelpful. Fix by refactoring so that all three functions share one copy of the code that interprets a weight argument. The invalid-encoding aspect seems to me (tgl) to justify back-patching. Author: Ewan Young Reviewed-by: Tom Lane Discussion: https://postgr.es/m/CAON2xHNaeLAUzRCXL5AmXLcXaSE_gWAVjWQRmLzc_oZ=1_Vf4Q@mail.gmail.com Backpatch-through: 14 --- src/backend/utils/adt/tsvector_op.c | 87 ++++++++++------------------- 1 file changed, 30 insertions(+), 57 deletions(-) diff --git a/src/backend/utils/adt/tsvector_op.c b/src/backend/utils/adt/tsvector_op.c index d8dece42b9b..53a9541e89f 100644 --- a/src/backend/utils/adt/tsvector_op.c +++ b/src/backend/utils/adt/tsvector_op.c @@ -207,17 +207,10 @@ tsvector_length(PG_FUNCTION_ARGS) PG_RETURN_INT32(ret); } -Datum -tsvector_setweight(PG_FUNCTION_ARGS) +static int +parse_weight(char cw) { - TSVector in = PG_GETARG_TSVECTOR(0); - char cw = PG_GETARG_CHAR(1); - TSVector out; - int i, - j; - WordEntry *entry; - WordEntryPos *p; - int w = 0; + int w; switch (cw) { @@ -238,9 +231,32 @@ tsvector_setweight(PG_FUNCTION_ARGS) w = 0; break; default: - /* internal error */ - elog(ERROR, "unrecognized weight: %d", cw); + /* Avoid printing non-ASCII bytes, else we have encoding issues */ + if (cw >= ' ' && cw < 0x7f) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("unrecognized weight: \"%c\"", cw))); + else /* use \ooo format, like charout() */ + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("unrecognized weight: \"\\%03o\"", + (unsigned char) cw))); } + return w; +} + + +Datum +tsvector_setweight(PG_FUNCTION_ARGS) +{ + TSVector in = PG_GETARG_TSVECTOR(0); + char cw = PG_GETARG_CHAR(1); + TSVector out; + int i, + j; + WordEntry *entry; + WordEntryPos *p; + int w = parse_weight(cw); out = (TSVector) palloc(VARSIZE(in)); memcpy(out, in, VARSIZE(in)); @@ -285,28 +301,7 @@ tsvector_setweight_by_filter(PG_FUNCTION_ARGS) Datum *dlexemes; bool *nulls; - switch (char_weight) - { - case 'A': - case 'a': - weight = 3; - break; - case 'B': - case 'b': - weight = 2; - break; - case 'C': - case 'c': - weight = 1; - break; - case 'D': - case 'd': - weight = 0; - break; - default: - /* internal error */ - elog(ERROR, "unrecognized weight: %c", char_weight); - } + weight = parse_weight(char_weight); tsout = (TSVector) palloc(VARSIZE(tsin)); memcpy(tsout, tsin, VARSIZE(tsin)); @@ -846,29 +841,7 @@ tsvector_filter(PG_FUNCTION_ARGS) errmsg("weight array may not contain nulls"))); char_weight = DatumGetChar(dweights[i]); - switch (char_weight) - { - case 'A': - case 'a': - mask = mask | 8; - break; - case 'B': - case 'b': - mask = mask | 4; - break; - case 'C': - case 'c': - mask = mask | 2; - break; - case 'D': - case 'd': - mask = mask | 1; - break; - default: - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("unrecognized weight: \"%c\"", char_weight))); - } + mask |= 1 << parse_weight(char_weight); } tsout = (TSVector) palloc0(VARSIZE(tsin)); -- 2.47.3