]> git.ipfire.org Git - thirdparty/postgresql.git/commitdiff
Speed up byteain by not parsing traditional-style input twice.
authorTom Lane <tgl@sss.pgh.pa.us>
Fri, 18 Jul 2025 20:42:02 +0000 (16:42 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Fri, 18 Jul 2025 20:42:10 +0000 (16:42 -0400)
Instead of laboriously computing the exact output length, use strlen
to get an upper bound cheaply.  (This is still O(N) of course, but
the constant factor is a lot less.)  This will typically result in
overallocating the output datum, but that's of little concern since
it's a short-lived allocation in just about all use-cases.

A simple microbenchmark showed about 40% speedup for long input
strings.

While here, make some cosmetic cleanups and add a test case that
covers the double-backslash code path in byteain and byteaout.

Author: Steven Niu <niushiji@gmail.com>
Reviewed-by: Kirill Reshke <reshkekirill@gmail.com>
Reviewed-by: Stepan Neretin <slpmcf@gmail.com>
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://postgr.es/m/ca315729-140b-426e-81a6-6cd5cfe7ecc5@gmail.com

src/backend/utils/adt/bytea.c
src/test/regress/expected/strings.out
src/test/regress/sql/strings.sql

index 2e539c2504e8da823a4d8abca76e1ebd74693b5b..6e7b914c56395fa4d941d3e8c4b43c9534703904 100644 (file)
@@ -182,27 +182,21 @@ bytea_overlay(bytea *t1, bytea *t2, int sp, int sl)
  *
  *             Non-printable characters must be passed as '\nnn' (octal) and are
  *             converted to internal form.  '\' must be passed as '\\'.
- *             ereport(ERROR, ...) if bad form.
- *
- *             BUGS:
- *                             The input is scanned twice.
- *                             The error checking of input is minimal.
  */
 Datum
 byteain(PG_FUNCTION_ARGS)
 {
        char       *inputText = PG_GETARG_CSTRING(0);
        Node       *escontext = fcinfo->context;
+       size_t          len = strlen(inputText);
+       size_t          bc;
        char       *tp;
        char       *rp;
-       int                     bc;
        bytea      *result;
 
        /* Recognize hex input */
        if (inputText[0] == '\\' && inputText[1] == 'x')
        {
-               size_t          len = strlen(inputText);
-
                bc = (len - 2) / 2 + VARHDRSZ;  /* maximum possible length */
                result = palloc(bc);
                bc = hex_decode_safe(inputText + 2, len - 2, VARDATA(result),
@@ -213,33 +207,7 @@ byteain(PG_FUNCTION_ARGS)
        }
 
        /* Else, it's the traditional escaped style */
-       for (bc = 0, tp = inputText; *tp != '\0'; bc++)
-       {
-               if (tp[0] != '\\')
-                       tp++;
-               else if ((tp[0] == '\\') &&
-                                (tp[1] >= '0' && tp[1] <= '3') &&
-                                (tp[2] >= '0' && tp[2] <= '7') &&
-                                (tp[3] >= '0' && tp[3] <= '7'))
-                       tp += 4;
-               else if ((tp[0] == '\\') &&
-                                (tp[1] == '\\'))
-                       tp += 2;
-               else
-               {
-                       /*
-                        * one backslash, not followed by another or ### valid octal
-                        */
-                       ereturn(escontext, (Datum) 0,
-                                       (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                        errmsg("invalid input syntax for type %s", "bytea")));
-               }
-       }
-
-       bc += VARHDRSZ;
-
-       result = (bytea *) palloc(bc);
-       SET_VARSIZE(result, bc);
+       result = (bytea *) palloc(len + VARHDRSZ);      /* maximum possible length */
 
        tp = inputText;
        rp = VARDATA(result);
@@ -247,21 +215,21 @@ byteain(PG_FUNCTION_ARGS)
        {
                if (tp[0] != '\\')
                        *rp++ = *tp++;
-               else if ((tp[0] == '\\') &&
-                                (tp[1] >= '0' && tp[1] <= '3') &&
+               else if ((tp[1] >= '0' && tp[1] <= '3') &&
                                 (tp[2] >= '0' && tp[2] <= '7') &&
                                 (tp[3] >= '0' && tp[3] <= '7'))
                {
-                       bc = VAL(tp[1]);
-                       bc <<= 3;
-                       bc += VAL(tp[2]);
-                       bc <<= 3;
-                       *rp++ = bc + VAL(tp[3]);
+                       int                     v;
+
+                       v = VAL(tp[1]);
+                       v <<= 3;
+                       v += VAL(tp[2]);
+                       v <<= 3;
+                       *rp++ = v + VAL(tp[3]);
 
                        tp += 4;
                }
-               else if ((tp[0] == '\\') &&
-                                (tp[1] == '\\'))
+               else if (tp[1] == '\\')
                {
                        *rp++ = '\\';
                        tp += 2;
@@ -269,7 +237,7 @@ byteain(PG_FUNCTION_ARGS)
                else
                {
                        /*
-                        * We should never get here. The first pass should not allow it.
+                        * one backslash, not followed by another or ### valid octal
                         */
                        ereturn(escontext, (Datum) 0,
                                        (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
@@ -277,6 +245,9 @@ byteain(PG_FUNCTION_ARGS)
                }
        }
 
+       bc = rp - VARDATA(result);      /* actual length */
+       SET_VARSIZE(result, bc + VARHDRSZ);
+
        PG_RETURN_BYTEA_P(result);
 }
 
index 788844abd20e39a6b1a0f67ef13f7fb043733b35..1bfd33de3f3c38b1be9934f993e6a11d8e358f9d 100644 (file)
@@ -236,6 +236,12 @@ SELECT E'De\\678dBeEf'::bytea;
 ERROR:  invalid input syntax for type bytea
 LINE 1: SELECT E'De\\678dBeEf'::bytea;
                ^
+SELECT E'DeAd\\\\BeEf'::bytea;
+        bytea         
+----------------------
+ \x446541645c42654566
+(1 row)
+
 SELECT reverse(''::bytea);
  reverse 
 ---------
@@ -291,6 +297,12 @@ SELECT E'De\\123dBeEf'::bytea;
  DeSdBeEf
 (1 row)
 
+SELECT E'DeAd\\\\BeEf'::bytea;
+   bytea    
+------------
+ DeAd\\BeEf
+(1 row)
+
 -- Test non-error-throwing API too
 SELECT pg_input_is_valid(E'\\xDeAdBeE', 'bytea');
  pg_input_is_valid 
index 2577a42987de751a06d4843569baf5744b898e43..92c445c24396123355b065d514ef4ac39cc47719 100644 (file)
@@ -76,6 +76,7 @@ SELECT E'De\\000dBeEf'::bytea;
 SELECT E'De\123dBeEf'::bytea;
 SELECT E'De\\123dBeEf'::bytea;
 SELECT E'De\\678dBeEf'::bytea;
+SELECT E'DeAd\\\\BeEf'::bytea;
 
 SELECT reverse(''::bytea);
 SELECT reverse('\xaa'::bytea);
@@ -88,6 +89,7 @@ SELECT E'\\xDe00BeEf'::bytea;
 SELECT E'DeAdBeEf'::bytea;
 SELECT E'De\\000dBeEf'::bytea;
 SELECT E'De\\123dBeEf'::bytea;
+SELECT E'DeAd\\\\BeEf'::bytea;
 
 -- Test non-error-throwing API too
 SELECT pg_input_is_valid(E'\\xDeAdBeE', 'bytea');