From: Chet Ramey <chet.ramey@case.edu>
Date: Mon, 4 Mar 2024 19:59:33 +0000 (-0500)
Subject: fix for printing case pattern lists beginning with "esac"; several fixes for expansio... 
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=54f3ed2278025081f897b9bd958fcf099fd5be18;p=thirdparty%2Fbash.git

fix for printing case pattern lists beginning with "esac"; several fixes for expansion when IFS contains DEL
---

diff --git a/CWRU/CWRU.chlog b/CWRU/CWRU.chlog
index 903af24c..bc5476b0 100644
--- a/CWRU/CWRU.chlog
+++ b/CWRU/CWRU.chlog
@@ -8750,3 +8750,30 @@ lib/readline/display.c
 	- update_line: use IS_COMBINING_CHAR instead of UNICODE_COMBINING_CHAR
 	  plus WCWIDTH; it doesn't make sense on systems where wcwidth isn't
 	  broken
+
+				   2/27
+				   ----
+print_cmd.c
+	- print_case_clauses: if one of the case command pattern lists begins
+	  with the word `esac' (unquoted), precede the pattern list with `(',
+	  since it had to be there originally to get through the parser.
+	  Report by Emanuele Torre <torreemanuele6@gmail.com>
+
+				   2/29
+				   ----
+general.c,general.h
+	- string_to_rlimtype: takes a second ENDP argument, like strtol, so
+	  the caller doesn't have to check that the string is all digits,
+	  but can optionally check for and disallow a 0x prefix
+
+				    3/2
+				    ---
+subst.c
+	- dequote_list: unset the W_QUOTED flag in the word after dequoting it
+	- parameter_brace_expand_rhs: if the word in the list returned by
+	  expand_string_for_rhs has W_QUOTED set, but the string being
+	  expanded was not quoted, turn on the W_QUOTED in the returned word
+	  so we can potentially avoid word splitting
+	- expand_word_internal: if CTLNUL is a IFS character, don't add quoted
+	  null strings to istring if we're going to be word splitting, since
+	  they will be treated as word delimiters
diff --git a/builtins/ulimit.def b/builtins/ulimit.def
index 0301089b..316d0ec3 100644
--- a/builtins/ulimit.def
+++ b/builtins/ulimit.def
@@ -94,6 +94,7 @@ $END
 #include "common.h"
 #include "bashgetopt.h"
 #include "pipesize.h"
+#include <chartypes.h>
 
 #if !defined (errno)
 extern int errno;
@@ -131,7 +132,7 @@ extern int errno;
 
 #if !defined (RLIMTYPE)
 #  define RLIMTYPE long
-#  define string_to_rlimtype(s) strtol(s, (char **)NULL, 10)
+#  define string_to_rlimtype(s, ep) strtol(s, ep, 10)
 #  define print_rlimtype(num, nl) printf ("%ld%s", num, nl ? "\n" : "")
 #endif
 
@@ -473,9 +474,16 @@ ulimit_internal (int cmd, char *cmdarg, int mode, int multiple)
     real_limit = soft_limit;
   else if (STREQ (cmdarg, "unlimited"))
     real_limit = RLIM_INFINITY;
-  else if (all_digits (cmdarg))
+  else if (DIGIT (*cmdarg) && (cmdarg[1] == 0 || DIGIT (cmdarg[1])))
     {
-      limit = string_to_rlimtype (cmdarg);
+      char *endp;
+      limit = string_to_rlimtype (cmdarg, &endp);
+      if (*endp != '\0')
+	{
+	  sh_invalidnum (cmdarg);
+	  return (EXECUTION_FAILURE);
+	}
+
       block_factor = BLOCKSIZE(limits[limind].block_factor);
       real_limit = limit * block_factor;
 
diff --git a/general.c b/general.c
index b395c54f..5c26ae38 100644
--- a/general.c
+++ b/general.c
@@ -170,15 +170,17 @@ set_posix_options (const char *bitmap)
 
 #if defined (RLIMTYPE)
 RLIMTYPE
-string_to_rlimtype (char *s)
+string_to_rlimtype (const char *string, char **ep)
 {
   RLIMTYPE ret;
   int neg;
+  const char *s;
 
   ret = 0;
   neg = 0;
+  s = string;
   /* ulimit_builtin doesn't allow leading whitespace or an optional
-     leading `+' or `-'. */
+     leading `+' or `-', so the caller has to check. */
   while (s && *s && whitespace (*s))
     s++;
   if (s && (*s == '-' || *s == '+'))
@@ -188,6 +190,8 @@ string_to_rlimtype (char *s)
     }
   for ( ; s && *s && DIGIT (*s); s++)
     ret = (ret * 10) + TODIGIT (*s);
+  if (ep)
+    *ep = (char *)s;
   return (neg ? -ret : ret);
 }
 
diff --git a/general.h b/general.h
index b86a4981..0528bbf1 100644
--- a/general.h
+++ b/general.h
@@ -297,7 +297,7 @@ extern void set_posix_options (const char *);
 extern void save_posix_options (void);
 
 #if defined (RLIMTYPE)
-extern RLIMTYPE string_to_rlimtype (char *);
+extern RLIMTYPE string_to_rlimtype (const char *, char **);
 extern void print_rlimtype (RLIMTYPE, int);
 #endif
 
diff --git a/print_cmd.c b/print_cmd.c
index 330223d3..9d3ab73b 100644
--- a/print_cmd.c
+++ b/print_cmd.c
@@ -771,6 +771,14 @@ print_case_clauses (PATTERN_LIST *clauses)
       if (printing_comsub == 0 || first == 0)
 	newline ("");
       first = 0;
+      /* "The grammar shows that reserved words can be used as patterns,
+	 even if one is the first word on a line. Obviously, the reserved
+	 word esac cannot be used in this manner." */
+      /* If the first word of the pattern list is literal "esac", the only
+	 way it could have gotten through the parser is to have been
+	 preceded by a left paren. */
+      if (STREQ (clauses->patterns->word->word, "esac"))
+	cprintf("(");
       command_print_word_list (clauses->patterns, " | ");
       cprintf (")\n");
       indentation += indentation_amount;
diff --git a/subst.c b/subst.c
index e5f7d250..2c51021b 100644
--- a/subst.c
+++ b/subst.c
@@ -252,6 +252,9 @@ static WORD_LIST *expand_string_for_pat (const char *, int, int *, int *);
 
 static char *quote_escapes_internal (const char *, int);
 
+static char *quote_ifs (const char *);
+static WORD_LIST *list_quote_ifs (WORD_LIST *);
+
 static WORD_LIST *list_quote_escapes (WORD_LIST *);
 static WORD_LIST *list_dequote_escapes (WORD_LIST *);
 
@@ -4478,6 +4481,8 @@ expand_string_for_rhs (const char *string, int quoted, int op, int pflags, int *
   /* This was further clarified on the austin-group list in March, 2017 and
      in Posix bug 1129 */
   old_nosplit = expand_no_split_dollar_star;
+  /* The check against ifs_is_null is so we don't split this time through,
+     since we will split the (possibly-quoted) results of this function. */
   expand_no_split_dollar_star = (quoted & (Q_DOUBLE_QUOTES|Q_HERE_DOCUMENT)) || op == '=' || ifs_is_null == 0;	/* XXX - was 1 */
   td.flags = W_EXPANDRHS;		/* expanding RHS of ${paramOPword} */
   td.flags |= W_NOSPLIT2;		/* no splitting, remove "" and '' */
@@ -4884,6 +4889,7 @@ dequote_list (WORD_LIST *list)
 	tlist->word->flags &= ~W_HASQUOTEDNULL;
       free (tlist->word->word);
       tlist->word->word = s;
+      tlist->word->flags &= ~W_QUOTED;		/* no longer quoted */
     }
   return list;
 }
@@ -4905,6 +4911,47 @@ remove_quoted_escapes (char *string)
   return (string);
 }
 
+/* Use CTLESC to quote IFS characters in STRING. Not used yet. */
+static char *
+quote_ifs (const char *string)
+{
+  const char *s, *send;
+  char *t, *result;
+  size_t slen;
+  DECLARE_MBSTATE; 
+
+  slen = strlen (string);
+  send = string + slen;
+
+  t = result = (char *)xmalloc ((slen * 2) + 1);
+  s = string;
+
+  while (*s)
+    {
+      if (isifs (*s))
+	*t++ = CTLESC;
+      COPY_CHAR_P (t, s, send);
+    }
+  *t = '\0';
+
+  return (result);
+}
+
+static WORD_LIST *
+list_quote_ifs (WORD_LIST *list)
+{
+  WORD_LIST *w;
+  char *t;
+
+  for (w = list; w; w = w->next)
+    {
+      t = w->word->word;
+      w->word->word = quote_ifs (t);
+      free (t);
+    }
+  return list;
+}
+
 /* Remove quoted $IFS characters from STRING.  Quoted IFS characters are
    added to protect them from word splitting, but we need to remove them
    if no word splitting takes place.  This returns newly-allocated memory,
@@ -7986,6 +8033,8 @@ parameter_brace_expand_rhs (char *name, char *value,
 	  temp = string_list (l);
 	  if (temp && (QUOTED_NULL (temp) == 0) && (l->word->flags & W_SAWQUOTEDNULL))
 	    w->flags |= W_SAWQUOTEDNULL;	/* XXX */
+	  if (temp && (l->word->flags & W_QUOTED) && quoted == 0)
+	    w->flags |= W_QUOTED;
 	}
 
       /* If we have a quoted null result (QUOTED_NULL(temp)) and the word is
@@ -10524,6 +10573,7 @@ param_expand (char *string, size_t *sindex, int quoted,
 	  temp = string_list_dollar_star (list, quoted, 0);
 	  if (temp)
 	    {
+	      /* Q_HERE_DOCUMENT as well? */
 	      temp1 = (quoted & Q_DOUBLE_QUOTES) ? quote_string (temp) : temp;
 	      if (*temp == 0)
 		tflag |= W_HASQUOTEDNULL;
@@ -10593,11 +10643,7 @@ param_expand (char *string, size_t *sindex, int quoted,
 	      temp = string_list_dollar_at (list, quoted, 0);
 	      /* Set W_SPLITSPACE to make sure the individual positional
 		 parameters are split into separate arguments */
-#if 0
-	      if (quoted == 0 && (ifs_is_set == 0 || ifs_is_null))
-#else	/* change with bash-5.0 */
 	      if (quoted == 0 && ifs_is_null)
-#endif
 		tflag |= W_SPLITSPACE;
 	      /* If we're not quoted but we still don't want word splitting, make
 		 we quote the IFS characters to protect them from splitting (e.g.,
@@ -11437,6 +11483,12 @@ add_string:
 	  if (tword && (tword->flags & W_SAWQUOTEDNULL))
 	    had_quoted_null = 1;		/* XXX */
 
+	  /* This loses tword->flags. If quoted == 0 but (tword->flags & W_QUOTED),
+	     we need to note that somewhere. It means that the result will be
+	     split, but this portion should not be. It only really matters if
+	     $IFS contains $'\001', since usually quoting with CTLESC will
+	     inhibit the word splitting. */
+
 	  temp = tword ? tword->word : (char *)NULL;
 	  dispose_word_desc (tword);
 
@@ -11724,6 +11776,10 @@ add_twochars:
 	     will cause word splitting. */
 	  if (temp == 0 && quoted_state == PARTIALLY_QUOTED && quoted == 0 && (word->flags & (W_NOSPLIT|W_EXPANDRHS|W_ASSIGNRHS)) == W_EXPANDRHS)
 	    {
+	      /* Don't add a quoted null character if it would eventually be
+		 used as a word delimiter when splitting. */
+	      if (isifs (CTLNUL))
+		continue;
 	      c = CTLNUL;
 	      sindex--;
 	      had_quoted_null = 1;
@@ -11732,6 +11788,11 @@ add_twochars:
 	  if (temp == 0 && quoted_state == PARTIALLY_QUOTED && (word->flags & (W_NOSPLIT|W_NOSPLIT2)))
 	    continue;
 
+	  /* Throw away a quoted null instead of adding a character that
+	     would eventually be a word delimiter when splitting. */
+	  if (temp == 0 && quoted_state == PARTIALLY_QUOTED && had_quoted_null && isifs(CTLNUL))
+	    continue;
+
 	add_quoted_string:
 
 	  if (temp)