From: Robert Haas Date: Mon, 6 Apr 2026 15:13:25 +0000 (-0400) Subject: Expose helper functions scan_quoted_identifier and scan_identifier. X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=e0e819cc08d3dc6fe843779c2bb3388135d20d14;p=thirdparty%2Fpostgresql.git Expose helper functions scan_quoted_identifier and scan_identifier. Previously, this logic was embedded within SplitIdentifierString, SplitDirectoriesString, and SplitGUCList. Factoring it out saves a bit of duplicated code, and also makes it available to extensions that might want to do similar things without necessarily wanting to do exactly the same thing. Reviewed-by: Matheus Alcantara Reviewed-by: Lukas Fittl Discussion: http://postgr.es/m/CA+Tgmob-0W8306mvrJX5Urtqt1AAasu8pi4yLrZ1XfwZU-Uj1w@mail.gmail.com --- diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index ecad6d62184..c0ff51bd2fc 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -2750,6 +2750,96 @@ textToQualifiedNameList(text *textval) return result; } +/* + * scan_quoted_identifier - In-place scanner for quoted identifiers. + * + * *nextp should point to the opening double-quote character, and will be + * updated to point just past the end. *endp is set to the position of + * the closing quote. The return value is the identifier, or NULL if the + * matching close-quote cannot be found. + * + * If we find two consecutive double quote characters, that doesn't end the + * identifier: instead, we collapse them into a double quote and include them + * in the resulting token. Note that this requires overwriting the rest of the + * string in place, including the portion beyond the final value of *nextp. + */ +char * +scan_quoted_identifier(char **endp, char **nextp) +{ + char *token = *nextp + 1; + + for (;;) + { + *endp = strchr(*nextp + 1, '"'); + if (*endp == NULL) + return NULL; /* mismatched quotes */ + if ((*endp)[1] != '"') + break; /* found end of quoted identifier */ + /* Collapse adjacent quotes into one quote, and look again */ + memmove(*endp, *endp + 1, strlen(*endp)); + *nextp = *endp; + } + /* *endp now points at the terminating quote */ + *nextp = *endp + 1; + + return token; +} + +/* + * scan_identifier - In-place scanner for quoted or unquoted identifiers. + * + * On success, *endp is set to the position where the caller should write '\0' + * to null-terminate the token, and *nextp is advanced past the token (and past + * the closing quote, if any). The return value is the token content, or NULL + * if there is a syntax error (mismatched quotes or empty unquoted token). + * + * Unquoted identifiers are terminated by whitespace or the first occurrence + * of the separator character. Additionally, if downcase_unquoted = true, + * unquoted identifiers are downcased in place. See scan_quoted_identifier for + * an additional way in which we modify the string in place. + */ +char * +scan_identifier(char **endp, char **nextp, char separator, bool downcase_unquoted) +{ + char *token; + + if (**nextp == '"') + return scan_quoted_identifier(endp, nextp); + + /* Unquoted identifier --- extends to separator or whitespace */ + token = *nextp; + + while (**nextp && **nextp != separator && !scanner_isspace(**nextp)) + (*nextp)++; + + if (*nextp == token) + return NULL; /* empty token */ + + *endp = *nextp; + + if (downcase_unquoted) + { + /* + * Downcase the identifier, using same code as main lexer does. + * + * XXX because we want to overwrite the input in-place, we cannot + * support a downcasing transformation that increases the string + * length. This is not a problem given the current implementation of + * downcase_truncate_identifier, but we'll probably have to do + * something about this someday. + */ + int len = *endp - token; + char *downname = downcase_truncate_identifier(token, len, false); + + Assert(strlen(downname) <= len); + strncpy(token, downname, len); /* strncpy is required here */ + pfree(downname); + } + + return token; +} + + /* * SplitIdentifierString --- parse a string containing identifiers * @@ -2794,53 +2884,9 @@ SplitIdentifierString(char *rawstring, char separator, char *curname; char *endp; - if (*nextp == '"') - { - /* Quoted name --- collapse quote-quote pairs, no downcasing */ - curname = nextp + 1; - for (;;) - { - endp = strchr(nextp + 1, '"'); - if (endp == NULL) - return false; /* mismatched quotes */ - if (endp[1] != '"') - break; /* found end of quoted name */ - /* Collapse adjacent quotes into one quote, and look again */ - memmove(endp, endp + 1, strlen(endp)); - nextp = endp; - } - /* endp now points at the terminating quote */ - nextp = endp + 1; - } - else - { - /* Unquoted name --- extends to separator or whitespace */ - char *downname; - int len; - - curname = nextp; - while (*nextp && *nextp != separator && - !scanner_isspace(*nextp)) - nextp++; - endp = nextp; - if (curname == nextp) - return false; /* empty unquoted name not allowed */ - - /* - * Downcase the identifier, using same code as main lexer does. - * - * XXX because we want to overwrite the input in-place, we cannot - * support a downcasing transformation that increases the string - * length. This is not a problem given the current implementation - * of downcase_truncate_identifier, but we'll probably have to do - * something about this someday. - */ - len = endp - curname; - downname = downcase_truncate_identifier(curname, len, false); - Assert(strlen(downname) <= len); - strncpy(curname, downname, len); /* strncpy is required here */ - pfree(downname); - } + curname = scan_identifier(&endp, &nextp, separator, true); + if (curname == NULL) + return false; /* mismatched quotes or empty name */ while (scanner_isspace(*nextp)) nextp++; /* skip trailing whitespace */ @@ -2924,20 +2970,9 @@ SplitDirectoriesString(char *rawstring, char separator, if (*nextp == '"') { /* Quoted name --- collapse quote-quote pairs */ - curname = nextp + 1; - for (;;) - { - endp = strchr(nextp + 1, '"'); - if (endp == NULL) - return false; /* mismatched quotes */ - if (endp[1] != '"') - break; /* found end of quoted name */ - /* Collapse adjacent quotes into one quote, and look again */ - memmove(endp, endp + 1, strlen(endp)); - nextp = endp; - } - /* endp now points at the terminating quote */ - nextp = endp + 1; + curname = scan_quoted_identifier(&endp, &nextp); + if (curname == NULL) + return false; /* mismatched quotes */ } else { @@ -3042,35 +3077,9 @@ SplitGUCList(char *rawstring, char separator, char *curname; char *endp; - if (*nextp == '"') - { - /* Quoted name --- collapse quote-quote pairs */ - curname = nextp + 1; - for (;;) - { - endp = strchr(nextp + 1, '"'); - if (endp == NULL) - return false; /* mismatched quotes */ - if (endp[1] != '"') - break; /* found end of quoted name */ - /* Collapse adjacent quotes into one quote, and look again */ - memmove(endp, endp + 1, strlen(endp)); - nextp = endp; - } - /* endp now points at the terminating quote */ - nextp = endp + 1; - } - else - { - /* Unquoted name --- extends to separator or whitespace */ - curname = nextp; - while (*nextp && *nextp != separator && - !scanner_isspace(*nextp)) - nextp++; - endp = nextp; - if (curname == nextp) - return false; /* empty unquoted name not allowed */ - } + curname = scan_identifier(&endp, &nextp, separator, false); + if (curname == NULL) + return false; /* mismatched quotes or empty name */ while (scanner_isspace(*nextp)) nextp++; /* skip trailing whitespace */ diff --git a/src/include/utils/varlena.h b/src/include/utils/varlena.h index 4b32574a075..fe8d8a58952 100644 --- a/src/include/utils/varlena.h +++ b/src/include/utils/varlena.h @@ -27,6 +27,9 @@ extern int varstr_levenshtein_less_equal(const char *source, int slen, int ins_c, int del_c, int sub_c, int max_d, bool trusted); extern List *textToQualifiedNameList(text *textval); +extern char *scan_quoted_identifier(char **endp, char **nextp); +extern char *scan_identifier(char **endp, char **nextp, char separator, + bool downcase_unquoted); extern bool SplitIdentifierString(char *rawstring, char separator, List **namelist); extern bool SplitDirectoriesString(char *rawstring, char separator,