From: Chet Ramey Date: Tue, 25 Jan 2022 14:33:34 +0000 (-0500) Subject: fix to expand $'...' and $"..." in certain word expansions while expanding lines... X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=6e1ab9a367ff927d1815922a24ced22e73ad965e;p=thirdparty%2Fbash.git fix to expand $'...' and $"..." in certain word expansions while expanding lines of here-document data --- diff --git a/CWRU/CWRU.chlog b/CWRU/CWRU.chlog index 36ed873e0..5982d30ca 100644 --- a/CWRU/CWRU.chlog +++ b/CWRU/CWRU.chlog @@ -2990,4 +2990,29 @@ subst.c - expand_string_dollar_quote: handle single-quoted and double-quoted strings that might include $' and $" without attempting translation; do more error checking for unterminated $' and $" that leaves those - characters unmodified + characters unmodified. This is for use by readline's various line + expansion functions (shell_expand_line) + + 1/23 + ---- +parse.y,make_cmd.c + - revert change that unconditionally processes $'...' and $"..." in + here-document bodies; there are only a couple of cases where they + should be processed in a double-quote environment + + 1/24 + ---- + +subst.c + - extract_dollar_brace_string: if we see another `${' on the rhs of + the operator, reset the dolbrace_state to DOLBRACE_PARAM while we + read this new ${...} string + - extract_heredoc_dolbrace_string: new function, variant of + extract_dollar_brace_string, to process the WORD in ${PARAM OP WORD} + while processing here-document data. It's complicated by the + requirement to add to the result string as we go along, since we + need to change the contents of the input string with ansi expansion + or locale translation. + - string_extract_single_quoted: take a new third argument: ALLOWESC. + This allows backslash to escape an embedded single quote, needed by + extract_heredoc_dolbrace_string to process $'...'; changed callers diff --git a/bashline.c b/bashline.c index 1edff1e1f..fc3d9a707 100644 --- a/bashline.c +++ b/bashline.c @@ -2848,10 +2848,6 @@ history_and_alias_expand_line (count, ignore) new_line = history_expand_line_internal (rl_line_buffer); #endif - t = expand_string_dollar_quote (new_line ? new_line : rl_line_buffer, 0); - FREE (new_line); - new_line = t; - #if defined (ALIAS) if (new_line) { diff --git a/redir.c b/redir.c index 2f249c229..5266b9f46 100644 --- a/redir.c +++ b/redir.c @@ -468,6 +468,7 @@ here_document_to_fd (redirectee, ri) { if (pipe (herepipe) < 0) { + /* XXX - goto use_tempfile; ? */ r = errno; if (document != redirectee->word) free (document); diff --git a/subst.c b/subst.c index fc172626b..94b2c17f5 100644 --- a/subst.c +++ b/subst.c @@ -263,10 +263,11 @@ static int do_assignment_internal PARAMS((const WORD_DESC *, int)); static char *string_extract_verbatim PARAMS((char *, size_t, int *, char *, int)); static char *string_extract PARAMS((char *, int *, char *, int)); static char *string_extract_double_quoted PARAMS((char *, int *, int)); -static inline char *string_extract_single_quoted PARAMS((char *, int *)); +static inline char *string_extract_single_quoted PARAMS((char *, int *, int)); static inline int skip_single_quoted PARAMS((const char *, size_t, int, int)); static int skip_double_quoted PARAMS((char *, size_t, int, int)); static char *extract_delimited_string PARAMS((char *, int *, char *, char *, char *, int)); +static char *extract_heredoc_dolbrace_string PARAMS((char *, int *, int, int)); static char *extract_dollar_brace_string PARAMS((char *, int *, int, int)); static int skip_matched_pair PARAMS((const char *, int, int, int, int)); @@ -1090,22 +1091,38 @@ skip_double_quoted (string, slen, sind, flags) /* Extract the contents of STRING as if it is enclosed in single quotes. SINDEX, when passed in, is the offset of the character immediately following the opening single quote; on exit, SINDEX is left pointing after - the closing single quote. */ + the closing single quote. ALLOWESC allows the single quote to be quoted by + a backslash; it's not used yet. */ static inline char * -string_extract_single_quoted (string, sindex) +string_extract_single_quoted (string, sindex, allowesc) char *string; int *sindex; + int allowesc; { register int i; size_t slen; char *t; + int pass_next; DECLARE_MBSTATE; /* Don't need slen for ADVANCE_CHAR unless multibyte chars possible. */ slen = (MB_CUR_MAX > 1) ? strlen (string + *sindex) + *sindex : 0; i = *sindex; - while (string[i] && string[i] != '\'') - ADVANCE_CHAR (string, slen, i); + pass_next = 0; + while (string[i]) + { + if (pass_next) + { + pass_next = 0; + ADVANCE_CHAR (string, slen, i); + continue; + } + if (allowesc && string[i] == '\\') + pass_next++; + else if (string[i] == '\'') + break; + ADVANCE_CHAR (string, slen, i); + } t = substring (string, *sindex, i); @@ -1162,7 +1179,7 @@ string_extract_verbatim (string, slen, sindex, charlist, flags) if ((flags & SX_NOCTLESC) && charlist[0] == '\'' && charlist[1] == '\0') { - temp = string_extract_single_quoted (string, sindex); + temp = string_extract_single_quoted (string, sindex, 0); --*sindex; /* leave *sindex at separator character */ return temp; } @@ -1501,6 +1518,265 @@ extract_delimited_string (string, sindex, opener, alt_opener, closer, flags) return (result); } +/* A simplified version of extract_dollar_brace_string that exists to handle + $'...' and $"..." quoting in here-documents, since the here-document read + path doesn't. It's separate because we don't want to mess with the fast + common path. We already know we're going to allocate and return a new + string and quoted == Q_HERE_DOCUMENT. We might be able to cut it down + some more, but extracting strings and adding them as we go adds complexity. */ +static char * +extract_heredoc_dolbrace_string (string, sindex, quoted, flags) + char *string; + int *sindex, quoted, flags; +{ + register int i, c; + size_t slen, tlen, result_index, result_size; + int pass_character, nesting_level, si, dolbrace_state; + char *result, *t, *send; + DECLARE_MBSTATE; + + pass_character = 0; + nesting_level = 1; + slen = strlen (string + *sindex) + *sindex; + send = string + slen; + + result_size = slen; + result_index = 0; + result = xmalloc (result_size + 1); + + /* This function isn't called if this condition is not true initially. */ + dolbrace_state = DOLBRACE_QUOTE; + + i = *sindex; + while (c = string[i]) + { + if (pass_character) + { + pass_character = 0; + RESIZE_MALLOCED_BUFFER (result, result_index, locale_mb_cur_max + 1, result_size, 64); + COPY_CHAR_I (result, result_index, string, send, i); + continue; + } + + /* CTLESCs and backslashes quote the next character. */ + if (c == CTLESC || c == '\\') + { + pass_character++; + RESIZE_MALLOCED_BUFFER (result, result_index, 2, result_size, 64); + result[result_index++] = c; + i++; + continue; + } + + /* The entire reason we have this separate function right here. */ + if (c == '$' && string[i+1] == '\'') + { + char *ttrans; + int ttranslen; + + si = i + 2; + t = string_extract_single_quoted (string, &si, 1); /* XXX */ + CHECK_STRING_OVERRUN (i, si, slen, c); + + tlen = si - i - 2; /* -2 since si is one after the close quote */ + ttrans = ansiexpand (t, 0, tlen, &ttranslen); + free (t); + + /* needed to correctly quote any embedded single quotes. */ + t = sh_single_quote (ttrans); + tlen = strlen (t); + free (ttrans); + + RESIZE_MALLOCED_BUFFER (result, result_index, tlen + 1, result_size, 64); + strncpy (result + result_index, t, tlen); + result_index += tlen; + free (t); + i = si; + continue; + } + +#if defined (TRANSLATABLE_STRINGS) + if (c == '$' && string[i+1] == '"') + { + char *ttrans; + int ttranslen; + + si = i + 2; + t = string_extract_double_quoted (string, &si, flags); /* XXX */ + CHECK_STRING_OVERRUN (i, si, slen, c); + + tlen = si - i - 2; /* -2 since si is one after the close quote */ + ttrans = locale_expand (t, 0, tlen, line_number, &ttranslen); + free (t); + + t = singlequote_translations ? sh_single_quote (ttrans) : sh_mkdoublequoted (ttrans, ttranslen, 0); + tlen = strlen (t); + free (ttrans); + + RESIZE_MALLOCED_BUFFER (result, result_index, tlen + 1, result_size, 64); + strncpy (result + result_index, t, tlen); + result_index += tlen; + free (t); + i = si; + continue; + } +#endif /* TRANSLATABLE_STRINGS */ + + if (c == '$' && string[i+1] == LBRACE) + { + nesting_level++; + RESIZE_MALLOCED_BUFFER (result, result_index, 3, result_size, 64); + result[result_index++] = c; + result[result_index++] = string[i+1]; + i += 2; + if (dolbrace_state == DOLBRACE_QUOTE || dolbrace_state == DOLBRACE_WORD) + dolbrace_state = DOLBRACE_PARAM; + continue; + } + + if (c == RBRACE) + { + nesting_level--; + if (nesting_level == 0) + break; + RESIZE_MALLOCED_BUFFER (result, result_index, 2, result_size, 64); + result[result_index++] = c; + i++; + continue; + } + + /* Pass the contents of old-style command substitutions through + verbatim. */ + if (c == '`') + { + si = i + 1; + t = string_extract (string, &si, "`", flags); /* already know (flags & SX_NOALLOC) == 0) */ + CHECK_STRING_OVERRUN (i, si, slen, c); + + tlen = si - i - 1; + RESIZE_MALLOCED_BUFFER (result, result_index, tlen + 3, result_size, 64); + result[result_index++] = c; + strncpy (result + result_index, t, tlen); + result_index += tlen; + result[result_index++] = string[si]; + free (t); + i = si + 1; + continue; + } + + /* Pass the contents of new-style command substitutions and + arithmetic substitutions through verbatim. */ + if (string[i] == '$' && string[i+1] == LPAREN) + { + si = i + 2; + t = extract_command_subst (string, &si, flags); + CHECK_STRING_OVERRUN (i, si, slen, c); + + tlen = si - i - 1; + RESIZE_MALLOCED_BUFFER (result, result_index, tlen + 4, result_size, 64); + result[result_index++] = c; + result[result_index++] = LPAREN; + strncpy (result + result_index, t, tlen); + result_index += tlen; + result[result_index++] = string[si]; + free (t); + i = si + 1; + continue; + } + +#if defined (PROCESS_SUBSTITUTION) + /* Technically this should only work at the start of a word */ + if ((string[i] == '<' || string[i] == '>') && string[i+1] == LPAREN) + { + si = i + 2; + t = extract_process_subst (string, (string[i] == '<' ? "<(" : ">)"), &si, flags); + CHECK_STRING_OVERRUN (i, si, slen, c); + + tlen = si - i - 1; + RESIZE_MALLOCED_BUFFER (result, result_index, tlen + 4, result_size, 64); + result[result_index++] = c; + result[result_index++] = LPAREN; + strncpy (result + result_index, t, tlen); + result_index += tlen; + result[result_index++] = string[si]; + free (t); + i = si + 1; + continue; + } +#endif + + if (c == '\'' && posixly_correct && shell_compatibility_level > 42 && dolbrace_state != DOLBRACE_QUOTE) + { + COPY_CHAR_I (result, result_index, string, send, i); + continue; + } + + /* Pass the contents of single and double-quoted strings through verbatim. */ + if (c == '"' || c == '\'') + { + si = i + 1; + if (c == '"') + t = string_extract_double_quoted (string, &si, flags); + else + t = string_extract_single_quoted (string, &si, 0); + CHECK_STRING_OVERRUN (i, si, slen, c); + + tlen = si - i - 2; /* -2 since si is one after the close quote */ + RESIZE_MALLOCED_BUFFER (result, result_index, tlen + 3, result_size, 64); + result[result_index++] = c; + strncpy (result + result_index, t, tlen); + result_index += tlen; + result[result_index++] = string[si - 1]; + free (t); + i = si; + continue; + } + + /* copy this character, which was not special. */ + COPY_CHAR_I (result, result_index, string, send, i); + + /* This logic must agree with parse.y:parse_matched_pair, since they + share the same defines. */ + if (dolbrace_state == DOLBRACE_PARAM && c == '%' && (i - *sindex) > 1) + dolbrace_state = DOLBRACE_QUOTE; + else if (dolbrace_state == DOLBRACE_PARAM && c == '#' && (i - *sindex) > 1) + dolbrace_state = DOLBRACE_QUOTE; + else if (dolbrace_state == DOLBRACE_PARAM && c == '/' && (i - *sindex) > 1) + dolbrace_state = DOLBRACE_QUOTE2; /* XXX */ + else if (dolbrace_state == DOLBRACE_PARAM && c == '^' && (i - *sindex) > 1) + dolbrace_state = DOLBRACE_QUOTE; + else if (dolbrace_state == DOLBRACE_PARAM && c == ',' && (i - *sindex) > 1) + dolbrace_state = DOLBRACE_QUOTE; + /* This is intended to handle all of the [:]op expansions and the substring/ + length/pattern removal/pattern substitution expansions. */ + else if (dolbrace_state == DOLBRACE_PARAM && strchr ("#%^,~:-=?+/", c) != 0) + dolbrace_state = DOLBRACE_OP; + else if (dolbrace_state == DOLBRACE_OP && strchr ("#%^,~:-=?+/", c) == 0) + dolbrace_state = DOLBRACE_WORD; + } + + if (c == 0 && nesting_level) + { + free (result); + if (no_longjmp_on_fatal_error == 0) + { /* { */ + last_command_exit_value = EXECUTION_FAILURE; + report_error (_("bad substitution: no closing `%s' in %s"), "}", string); + exp_jump_to_top_level (DISCARD); + } + else + { + *sindex = i; + return ((char *)NULL); + } + } + + *sindex = i; + result[result_index] = '\0'; + + return (result); +} + /* Extract a parameter expansion expression within ${ and } from STRING. Obey the Posix.2 rules for finding the ending `}': count braces while skipping over enclosed quoted strings and command substitutions. @@ -1520,10 +1796,6 @@ extract_dollar_brace_string (string, sindex, quoted, flags) char *result, *t; DECLARE_MBSTATE; - pass_character = 0; - nesting_level = 1; - slen = strlen (string + *sindex) + *sindex; - /* The handling of dolbrace_state needs to agree with the code in parse.y: parse_matched_pair(). The different initial value is to handle the case where this function is called to parse the word in @@ -1532,6 +1804,13 @@ extract_dollar_brace_string (string, sindex, quoted, flags) if ((quoted & (Q_HERE_DOCUMENT|Q_DOUBLE_QUOTES)) && (flags & SX_POSIXEXP)) dolbrace_state = DOLBRACE_QUOTE; + if (quoted == Q_HERE_DOCUMENT && dolbrace_state == DOLBRACE_QUOTE && (flags & SX_NOALLOC) == 0) + return (extract_heredoc_dolbrace_string (string, sindex, quoted, flags)); + + pass_character = 0; + nesting_level = 1; + slen = strlen (string + *sindex) + *sindex; + i = *sindex; while (c = string[i]) { @@ -1554,6 +1833,8 @@ extract_dollar_brace_string (string, sindex, quoted, flags) { nesting_level++; i += 2; + if (dolbrace_state == DOLBRACE_QUOTE || dolbrace_state == DOLBRACE_WORD) + dolbrace_state = DOLBRACE_PARAM; continue; } @@ -3939,6 +4220,7 @@ expand_string_dollar_quote (string, flags) } if (peekc == '\'') { + /* SX_COMPLETE is the equivalent of ALLOWESC here */ /* We overload SX_COMPLETE below */ news = skip_single_quoted (string, slen, ++sindex, SX_COMPLETE); /* Check for unclosed string and don't bother if so */ @@ -11228,7 +11510,7 @@ add_twochars: goto add_character; t_index = ++sindex; - temp = string_extract_single_quoted (string, &sindex); + temp = string_extract_single_quoted (string, &sindex, 0); /* If the entire STRING was surrounded by single quotes, then the string is wholly quoted. */ @@ -11578,7 +11860,7 @@ string_quote_removal (string, quoted) break; } tindex = sindex + 1; - temp = string_extract_single_quoted (string, &tindex); + temp = string_extract_single_quoted (string, &tindex, 0); if (temp) { strcpy (r, temp); diff --git a/tests/posixexp.right b/tests/posixexp.right index ffa3055bb..0955995c1 100644 --- a/tests/posixexp.right +++ b/tests/posixexp.right @@ -275,7 +275,7 @@ argv[2] = [ abc def ghi jkl / abc def ghi jkl ] [ abc def ghi jkl ] [ abc def ghi jkl / abc def ghi jkl / abc def ghi jkl ] -5: notOK +5: OK OK OK 5: $'not\ttoo\nbad'