fix to expand $'...' and $"..." in certain word expansions while expanding lines...

author Chet Ramey <chet.ramey@case.edu>

Tue, 25 Jan 2022 14:33:34 +0000 (09:33 -0500)

committer Chet Ramey <chet.ramey@case.edu>

Tue, 25 Jan 2022 14:33:34 +0000 (09:33 -0500)
author Chet Ramey <chet.ramey@case.edu>
Tue, 25 Jan 2022 14:33:34 +0000 (09:33 -0500)
committer Chet Ramey <chet.ramey@case.edu>
Tue, 25 Jan 2022 14:33:34 +0000 (09:33 -0500)
diff --git a/CWRU/CWRU.chlog b/CWRU/CWRU.chlog

index 36ed873e0cd0ce0e3fa3f076bce03f143c9ba363..5982d30cab752979a634de7370af9e64afe76889 100644 (file)
--- a/CWRU/CWRU.chlog
+++ b/CWRU/CWRU.chlog
@@ -2990,4 +2990,29 @@ subst.c
         - expand_string_dollar_quote: handle single-quoted and double-quoted
           strings that might include $' and $" without attempting translation;
           do more error checking for unterminated $' and $" that leaves those
-         characters unmodified
+         characters unmodified. This is for use by readline's various line
+         expansion functions (shell_expand_line)
+
+                                  1/23
+                                  ----
+parse.y,make_cmd.c
+       - revert change that unconditionally processes $'...' and $"..." in
+         here-document bodies; there are only a couple of cases where they
+         should be processed in a double-quote environment
+
+                                  1/24
+                                  ----
+
+subst.c
+       - extract_dollar_brace_string: if we see another `${' on the rhs of
+         the operator, reset the dolbrace_state to DOLBRACE_PARAM while we
+         read this new ${...} string
+       - extract_heredoc_dolbrace_string: new function, variant of
+         extract_dollar_brace_string, to process the WORD in ${PARAM OP WORD}
+         while processing here-document data. It's complicated by the
+         requirement to add to the result string as we go along, since we
+         need to change the contents of the input string with ansi expansion
+         or locale translation.
+       - string_extract_single_quoted: take a new third argument: ALLOWESC.
+         This allows backslash to escape an embedded single quote, needed by
+         extract_heredoc_dolbrace_string to process $'...'; changed callers
diff --git a/bashline.c b/bashline.c

index 1edff1e1fc8c0a5d0f25948c1c956b0545dacd41..fc3d9a70798b48b42a1977b4cbc37115b874f7ba 100644 (file)
--- a/bashline.c
+++ b/bashline.c
@@ -2848,10 +2848,6 @@ history_and_alias_expand_line (count, ignore)
    new_line = history_expand_line_internal (rl_line_buffer);
  #endif
  
-  t = expand_string_dollar_quote (new_line ? new_line : rl_line_buffer, 0);
-  FREE (new_line);
-  new_line = t;
-
  #if defined (ALIAS)
    if (new_line)
      {
diff --git a/redir.c b/redir.c

index 2f249c229b0675aa478847b2da8ef266efa8f7f6..5266b9f46bacd0ba2fe56758be99e2da1676d3df 100644 (file)
--- a/redir.c
+++ b/redir.c
@@ -468,6 +468,7 @@ here_document_to_fd (redirectee, ri)
      {
        if (pipe (herepipe) < 0)
         {
+         /* XXX - goto use_tempfile; ? */
           r = errno;
           if (document != redirectee->word)
             free (document);
diff --git a/subst.c b/subst.c

index fc172626b924be08741964a2aae524ac98b151e4..94b2c17f50780522a8bdf990f7c63a163b1deaec 100644 (file)
--- a/subst.c
+++ b/subst.c
@@ -263,10 +263,11 @@ static int do_assignment_internal PARAMS((const WORD_DESC *, int));
  static char *string_extract_verbatim PARAMS((char *, size_t, int *, char *, int));
  static char *string_extract PARAMS((char *, int *, char *, int));
  static char *string_extract_double_quoted PARAMS((char *, int *, int));
-static inline char *string_extract_single_quoted PARAMS((char *, int *));
+static inline char *string_extract_single_quoted PARAMS((char *, int *, int));
  static inline int skip_single_quoted PARAMS((const char *, size_t, int, int));
  static int skip_double_quoted PARAMS((char *, size_t, int, int));
  static char *extract_delimited_string PARAMS((char *, int *, char *, char *, char *, int));
+static char *extract_heredoc_dolbrace_string PARAMS((char *, int *, int, int));
  static char *extract_dollar_brace_string PARAMS((char *, int *, int, int));
  static int skip_matched_pair PARAMS((const char *, int, int, int, int));
  
@@ -1090,22 +1091,38 @@ skip_double_quoted (string, slen, sind, flags)
  /* Extract the contents of STRING as if it is enclosed in single quotes.
     SINDEX, when passed in, is the offset of the character immediately
     following the opening single quote; on exit, SINDEX is left pointing after
-   the closing single quote. */
+   the closing single quote. ALLOWESC allows the single quote to be quoted by
+   a backslash; it's not used yet. */
  static inline char *
-string_extract_single_quoted (string, sindex)
+string_extract_single_quoted (string, sindex, allowesc)
       char *string;
       int *sindex;
+     int allowesc;
  {
    register int i;
    size_t slen;
    char *t;
+  int pass_next;
    DECLARE_MBSTATE;
  
    /* Don't need slen for ADVANCE_CHAR unless multibyte chars possible. */
    slen = (MB_CUR_MAX > 1) ? strlen (string + *sindex) + *sindex : 0;
    i = *sindex;
-  while (string[i] && string[i] != '\'')
-    ADVANCE_CHAR (string, slen, i);
+  pass_next = 0;
+  while (string[i])
+    {
+      if (pass_next)
+       {
+         pass_next = 0;
+         ADVANCE_CHAR (string, slen, i);
+         continue;
+       }
+      if (allowesc && string[i] == '\\')
+       pass_next++;
+      else if (string[i] == '\'')
+        break;
+      ADVANCE_CHAR (string, slen, i);
+    }
  
    t = substring (string, *sindex, i);
  
@@ -1162,7 +1179,7 @@ string_extract_verbatim (string, slen, sindex, charlist, flags)
  
    if ((flags & SX_NOCTLESC) && charlist[0] == '\'' && charlist[1] == '\0')
      {
-      temp = string_extract_single_quoted (string, sindex);
+      temp = string_extract_single_quoted (string, sindex, 0);
        --*sindex;       /* leave *sindex at separator character */
        return temp;
      }
@@ -1501,6 +1518,265 @@ extract_delimited_string (string, sindex, opener, alt_opener, closer, flags)
    return (result);
  }
  
+/* A simplified version of extract_dollar_brace_string that exists to handle
+   $'...' and $"..." quoting in here-documents, since the here-document read
+   path doesn't. It's separate because we don't want to mess with the fast
+   common path. We already know we're going to allocate and return a new
+   string and quoted == Q_HERE_DOCUMENT. We might be able to cut it down
+   some more, but extracting strings and adding them as we go adds complexity. */
+static char *
+extract_heredoc_dolbrace_string (string, sindex, quoted, flags)
+     char *string;
+     int *sindex, quoted, flags;
+{
+  register int i, c;
+  size_t slen, tlen, result_index, result_size;
+  int pass_character, nesting_level, si, dolbrace_state;
+  char *result, *t, *send;
+  DECLARE_MBSTATE;
+
+  pass_character = 0;
+  nesting_level = 1;
+  slen = strlen (string + *sindex) + *sindex;
+  send = string + slen;
+
+  result_size = slen;
+  result_index = 0;
+  result = xmalloc (result_size + 1);
+
+  /* This function isn't called if this condition is not true initially. */
+  dolbrace_state = DOLBRACE_QUOTE;
+
+  i = *sindex;
+  while (c = string[i])
+    {
+      if (pass_character)
+       {
+         pass_character = 0;
+         RESIZE_MALLOCED_BUFFER (result, result_index, locale_mb_cur_max + 1, result_size, 64);
+         COPY_CHAR_I (result, result_index, string, send, i);
+         continue;
+       }
+
+      /* CTLESCs and backslashes quote the next character. */
+      if (c == CTLESC || c == '\\')
+       {
+         pass_character++;
+         RESIZE_MALLOCED_BUFFER (result, result_index, 2, result_size, 64);
+         result[result_index++] = c;
+         i++;
+         continue;
+       }
+
+      /* The entire reason we have this separate function right here. */
+      if (c == '$' && string[i+1] == '\'')
+       {
+         char *ttrans;
+         int ttranslen;
+
+         si = i + 2;
+         t = string_extract_single_quoted (string, &si, 1);    /* XXX */
+         CHECK_STRING_OVERRUN (i, si, slen, c);
+
+         tlen = si - i - 2;    /* -2 since si is one after the close quote */
+         ttrans = ansiexpand (t, 0, tlen, &ttranslen);
+         free (t);
+
+         /* needed to correctly quote any embedded single quotes. */
+         t = sh_single_quote (ttrans);
+         tlen = strlen (t);
+         free (ttrans);
+
+         RESIZE_MALLOCED_BUFFER (result, result_index, tlen + 1, result_size, 64);
+         strncpy (result + result_index, t, tlen);
+         result_index += tlen;
+         free (t);
+         i = si;
+         continue;
+       }
+
+#if defined (TRANSLATABLE_STRINGS)
+      if (c == '$' && string[i+1] == '"')
+       {
+         char *ttrans;
+         int ttranslen;
+
+         si = i + 2;
+         t = string_extract_double_quoted (string, &si, flags);        /* XXX */
+         CHECK_STRING_OVERRUN (i, si, slen, c);
+
+         tlen = si - i - 2;    /* -2 since si is one after the close quote */
+         ttrans = locale_expand (t, 0, tlen, line_number, &ttranslen);
+         free (t);
+
+         t = singlequote_translations ? sh_single_quote (ttrans) : sh_mkdoublequoted (ttrans, ttranslen, 0);
+         tlen = strlen (t);
+         free (ttrans);
+
+         RESIZE_MALLOCED_BUFFER (result, result_index, tlen + 1, result_size, 64);
+         strncpy (result + result_index, t, tlen);
+         result_index += tlen;
+         free (t);
+         i = si;
+         continue;
+       }
+#endif /* TRANSLATABLE_STRINGS */
+
+      if (c == '$' && string[i+1] == LBRACE)
+       {
+         nesting_level++;
+         RESIZE_MALLOCED_BUFFER (result, result_index, 3, result_size, 64);
+         result[result_index++] = c;
+         result[result_index++] = string[i+1];
+         i += 2;
+         if (dolbrace_state == DOLBRACE_QUOTE || dolbrace_state == DOLBRACE_WORD)
+           dolbrace_state = DOLBRACE_PARAM;
+         continue;
+       }
+
+      if (c == RBRACE)
+       {
+         nesting_level--;
+         if (nesting_level == 0)
+           break;
+         RESIZE_MALLOCED_BUFFER (result, result_index, 2, result_size, 64);
+         result[result_index++] = c;
+         i++;
+         continue;
+       }
+
+      /* Pass the contents of old-style command substitutions through
+        verbatim. */
+      if (c == '`')
+       {
+         si = i + 1;
+         t = string_extract (string, &si, "`", flags); /* already know (flags & SX_NOALLOC) == 0) */
+         CHECK_STRING_OVERRUN (i, si, slen, c);
+
+         tlen = si - i - 1;
+         RESIZE_MALLOCED_BUFFER (result, result_index, tlen + 3, result_size, 64);
+         result[result_index++] = c;
+         strncpy (result + result_index, t, tlen);
+         result_index += tlen;
+         result[result_index++] = string[si];
+         free (t);
+         i = si + 1;
+         continue;
+       }
+
+      /* Pass the contents of new-style command substitutions and
+        arithmetic substitutions through verbatim. */
+      if (string[i] == '$' && string[i+1] == LPAREN)
+       {
+         si = i + 2;
+         t = extract_command_subst (string, &si, flags);
+         CHECK_STRING_OVERRUN (i, si, slen, c);
+
+         tlen = si - i - 1;
+         RESIZE_MALLOCED_BUFFER (result, result_index, tlen + 4, result_size, 64);
+         result[result_index++] = c;
+         result[result_index++] = LPAREN;
+         strncpy (result + result_index, t, tlen);
+         result_index += tlen;
+         result[result_index++] = string[si];
+         free (t);
+         i = si + 1;
+         continue;
+       }
+
+#if defined (PROCESS_SUBSTITUTION)
+      /* Technically this should only work at the start of a word */
+      if ((string[i] == '<' || string[i] == '>') && string[i+1] == LPAREN)
+       {
+         si = i + 2;
+         t = extract_process_subst (string, (string[i] == '<' ? "<(" : ">)"), &si, flags);
+         CHECK_STRING_OVERRUN (i, si, slen, c);
+
+         tlen = si - i - 1;
+         RESIZE_MALLOCED_BUFFER (result, result_index, tlen + 4, result_size, 64);
+         result[result_index++] = c;
+         result[result_index++] = LPAREN;
+         strncpy (result + result_index, t, tlen);
+         result_index += tlen;
+         result[result_index++] = string[si];
+         free (t);
+         i = si + 1;
+         continue;
+       }
+#endif
+
+      if (c == '\'' && posixly_correct && shell_compatibility_level > 42 && dolbrace_state != DOLBRACE_QUOTE)
+       {
+         COPY_CHAR_I (result, result_index, string, send, i);
+         continue;
+       }
+
+      /* Pass the contents of single and double-quoted strings through verbatim. */
+      if (c == '"' || c == '\'')
+       {
+         si = i + 1;
+         if (c == '"')
+           t = string_extract_double_quoted (string, &si, flags);
+         else
+           t = string_extract_single_quoted (string, &si, 0);
+         CHECK_STRING_OVERRUN (i, si, slen, c);
+
+         tlen = si - i - 2;    /* -2 since si is one after the close quote */
+         RESIZE_MALLOCED_BUFFER (result, result_index, tlen + 3, result_size, 64);
+         result[result_index++] = c;
+         strncpy (result + result_index, t, tlen);
+         result_index += tlen;
+         result[result_index++] = string[si - 1];
+         free (t);
+         i = si;
+         continue;
+       }
+
+      /* copy this character, which was not special. */
+      COPY_CHAR_I (result, result_index, string, send, i);
+
+      /* This logic must agree with parse.y:parse_matched_pair, since they
+        share the same defines. */
+      if (dolbrace_state == DOLBRACE_PARAM && c == '%' && (i - *sindex) > 1)
+       dolbrace_state = DOLBRACE_QUOTE;
+      else if (dolbrace_state == DOLBRACE_PARAM && c == '#' && (i - *sindex) > 1)
+        dolbrace_state = DOLBRACE_QUOTE;
+      else if (dolbrace_state == DOLBRACE_PARAM && c == '/' && (i - *sindex) > 1)
+        dolbrace_state = DOLBRACE_QUOTE2;      /* XXX */
+      else if (dolbrace_state == DOLBRACE_PARAM && c == '^' && (i - *sindex) > 1)
+        dolbrace_state = DOLBRACE_QUOTE;
+      else if (dolbrace_state == DOLBRACE_PARAM && c == ',' && (i - *sindex) > 1)
+        dolbrace_state = DOLBRACE_QUOTE;
+      /* This is intended to handle all of the [:]op expansions and the substring/
+        length/pattern removal/pattern substitution expansions. */
+      else if (dolbrace_state == DOLBRACE_PARAM && strchr ("#%^,~:-=?+/", c) != 0)
+       dolbrace_state = DOLBRACE_OP;
+      else if (dolbrace_state == DOLBRACE_OP && strchr ("#%^,~:-=?+/", c) == 0)
+       dolbrace_state = DOLBRACE_WORD;
+    }
+
+  if (c == 0 && nesting_level)
+    {
+      free (result);
+      if (no_longjmp_on_fatal_error == 0)
+       {                       /* { */
+         last_command_exit_value = EXECUTION_FAILURE;
+         report_error (_("bad substitution: no closing `%s' in %s"), "}", string);
+         exp_jump_to_top_level (DISCARD);
+       }
+      else
+       {
+         *sindex = i;
+         return ((char *)NULL);
+       }
+    }
+
+  *sindex = i;
+  result[result_index] = '\0';
+
+  return (result);
+}
+
  /* Extract a parameter expansion expression within ${ and } from STRING.
     Obey the Posix.2 rules for finding the ending `}': count braces while
     skipping over enclosed quoted strings and command substitutions.
@@ -1520,10 +1796,6 @@ extract_dollar_brace_string (string, sindex, quoted, flags)
    char *result, *t;
    DECLARE_MBSTATE;
  
-  pass_character = 0;
-  nesting_level = 1;
-  slen = strlen (string + *sindex) + *sindex;
-
    /* The handling of dolbrace_state needs to agree with the code in parse.y:
       parse_matched_pair().  The different initial value is to handle the
       case where this function is called to parse the word in
@@ -1532,6 +1804,13 @@ extract_dollar_brace_string (string, sindex, quoted, flags)
    if ((quoted & (Q_HERE_DOCUMENT|Q_DOUBLE_QUOTES)) && (flags & SX_POSIXEXP))
      dolbrace_state = DOLBRACE_QUOTE;
  
+  if (quoted == Q_HERE_DOCUMENT && dolbrace_state == DOLBRACE_QUOTE && (flags & SX_NOALLOC) == 0)
+    return (extract_heredoc_dolbrace_string (string, sindex, quoted, flags));
+
+  pass_character = 0;
+  nesting_level = 1;
+  slen = strlen (string + *sindex) + *sindex;
+
    i = *sindex;
    while (c = string[i])
      {
@@ -1554,6 +1833,8 @@ extract_dollar_brace_string (string, sindex, quoted, flags)
         {
           nesting_level++;
           i += 2;
+         if (dolbrace_state == DOLBRACE_QUOTE || dolbrace_state == DOLBRACE_WORD)
+           dolbrace_state = DOLBRACE_PARAM;
           continue;
         }
  
@@ -3939,6 +4220,7 @@ expand_string_dollar_quote (string, flags)
             }
           if (peekc == '\'')
             {
+             /* SX_COMPLETE is the  equivalent of ALLOWESC here */
               /* We overload SX_COMPLETE below */
               news = skip_single_quoted (string, slen, ++sindex, SX_COMPLETE);
               /* Check for unclosed string and don't bother if so */
@@ -11228,7 +11510,7 @@ add_twochars:
             goto add_character;
  
           t_index = ++sindex;
-         temp = string_extract_single_quoted (string, &sindex);
+         temp = string_extract_single_quoted (string, &sindex, 0);
  
           /* If the entire STRING was surrounded by single quotes,
              then the string is wholly quoted. */
@@ -11578,7 +11860,7 @@ string_quote_removal (string, quoted)
               break;
             }
           tindex = sindex + 1;
-         temp = string_extract_single_quoted (string, &tindex);
+         temp = string_extract_single_quoted (string, &tindex, 0);
           if (temp)
             {
               strcpy (r, temp);
diff --git a/tests/posixexp.right b/tests/posixexp.right

index ffa3055bb02d9263612fd38acae07ccfd097b533..0955995c19274f9bec65f9a7c0ffef7946320926 100644 (file)
--- a/tests/posixexp.right
+++ b/tests/posixexp.right
@@ -275,7 +275,7 @@ argv[2] = <b>
  [  abc    def  ghi  jkl /  abc    def  ghi  jkl ]
  [  abc    def  ghi  jkl ]
  [  abc    def  ghi  jkl /  abc    def  ghi  jkl /  abc    def  ghi  jkl ]
-5: notOK
+5: OK
  OK
  OK
  5: $'not\ttoo\nbad'
author	Chet Ramey <chet.ramey@case.edu>
	Tue, 25 Jan 2022 14:33:34 +0000 (09:33 -0500)
committer	Chet Ramey <chet.ramey@case.edu>
	Tue, 25 Jan 2022 14:33:34 +0000 (09:33 -0500)
CWRU/CWRU.chlog		patch \| blob \| blame \| history
bashline.c		patch \| blob \| blame \| history
redir.c		patch \| blob \| blame \| history
subst.c		patch \| blob \| blame \| history
tests/posixexp.right		patch \| blob \| blame \| history