]> git.ipfire.org Git - thirdparty/postgresql.git/commitdiff
Improve reporting of invalid weight symbols in setweight() et al. REL_18_STABLE github/REL_18_STABLE
authorTom Lane <tgl@sss.pgh.pa.us>
Thu, 4 Jun 2026 16:24:51 +0000 (12:24 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Thu, 4 Jun 2026 16:24:51 +0000 (12:24 -0400)
This commit addresses two related issues:

tsvector_filter() assumed it could print an incorrect weight value
with %c.  This could result in an invalidly-encoded error message
if the database encoding is multibyte and the char value has its
high bit set.  Weight values that are ASCII control characters
could render illegibly too.  Fix by printing such values in octal
(\ooo), similarly to how charout() would render them.

tsvector_setweight() and tsvector_setweight_by_filter() reported
the same unrecognized-weight error condition with elog(), as though
it were an internal error.  That'd not translate, would produce an
unwanted XX000 SQLSTATE code, and also reported the bad value as a
decimal integer which seems unhelpful.  Fix by refactoring so that
all three functions share one copy of the code that interprets a
weight argument.

The invalid-encoding aspect seems to me (tgl) to justify
back-patching.

Author: Ewan Young <kdbase.hack@gmail.com>
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://postgr.es/m/CAON2xHNaeLAUzRCXL5AmXLcXaSE_gWAVjWQRmLzc_oZ=1_Vf4Q@mail.gmail.com
Backpatch-through: 14

src/backend/utils/adt/tsvector_op.c

index ea84e1cf12382793d71727759e4d0c1333c7cb70..ebe12ed52353f04e1e078a7f4a52ef1e2facd8e9 100644 (file)
@@ -207,17 +207,10 @@ tsvector_length(PG_FUNCTION_ARGS)
        PG_RETURN_INT32(ret);
 }
 
-Datum
-tsvector_setweight(PG_FUNCTION_ARGS)
+static int
+parse_weight(char cw)
 {
-       TSVector        in = PG_GETARG_TSVECTOR(0);
-       char            cw = PG_GETARG_CHAR(1);
-       TSVector        out;
-       int                     i,
-                               j;
-       WordEntry  *entry;
-       WordEntryPos *p;
-       int                     w = 0;
+       int                     w;
 
        switch (cw)
        {
@@ -238,9 +231,32 @@ tsvector_setweight(PG_FUNCTION_ARGS)
                        w = 0;
                        break;
                default:
-                       /* internal error */
-                       elog(ERROR, "unrecognized weight: %d", cw);
+                       /* Avoid printing non-ASCII bytes, else we have encoding issues */
+                       if (cw >= ' ' && cw < 0x7f)
+                               ereport(ERROR,
+                                               (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                                                errmsg("unrecognized weight: \"%c\"", cw)));
+                       else                            /* use \ooo format, like charout() */
+                               ereport(ERROR,
+                                               (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                                                errmsg("unrecognized weight: \"\\%03o\"",
+                                                               (unsigned char) cw)));
        }
+       return w;
+}
+
+
+Datum
+tsvector_setweight(PG_FUNCTION_ARGS)
+{
+       TSVector        in = PG_GETARG_TSVECTOR(0);
+       char            cw = PG_GETARG_CHAR(1);
+       TSVector        out;
+       int                     i,
+                               j;
+       WordEntry  *entry;
+       WordEntryPos *p;
+       int                     w = parse_weight(cw);
 
        out = (TSVector) palloc(VARSIZE(in));
        memcpy(out, in, VARSIZE(in));
@@ -285,28 +301,7 @@ tsvector_setweight_by_filter(PG_FUNCTION_ARGS)
        Datum      *dlexemes;
        bool       *nulls;
 
-       switch (char_weight)
-       {
-               case 'A':
-               case 'a':
-                       weight = 3;
-                       break;
-               case 'B':
-               case 'b':
-                       weight = 2;
-                       break;
-               case 'C':
-               case 'c':
-                       weight = 1;
-                       break;
-               case 'D':
-               case 'd':
-                       weight = 0;
-                       break;
-               default:
-                       /* internal error */
-                       elog(ERROR, "unrecognized weight: %c", char_weight);
-       }
+       weight = parse_weight(char_weight);
 
        tsout = (TSVector) palloc(VARSIZE(tsin));
        memcpy(tsout, tsin, VARSIZE(tsin));
@@ -845,29 +840,7 @@ tsvector_filter(PG_FUNCTION_ARGS)
                                         errmsg("weight array may not contain nulls")));
 
                char_weight = DatumGetChar(dweights[i]);
-               switch (char_weight)
-               {
-                       case 'A':
-                       case 'a':
-                               mask = mask | 8;
-                               break;
-                       case 'B':
-                       case 'b':
-                               mask = mask | 4;
-                               break;
-                       case 'C':
-                       case 'c':
-                               mask = mask | 2;
-                               break;
-                       case 'D':
-                       case 'd':
-                               mask = mask | 1;
-                               break;
-                       default:
-                               ereport(ERROR,
-                                               (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-                                                errmsg("unrecognized weight: \"%c\"", char_weight)));
-               }
+               mask |= 1 << parse_weight(char_weight);
        }
 
        tsout = (TSVector) palloc0(VARSIZE(tsin));