From c299f535be51179b1e0c989ad9ba4365e182ec28 Mon Sep 17 00:00:00 2001 From: Chet Ramey Date: Mon, 27 Oct 2025 10:42:10 -0400 Subject: [PATCH] speedups for character processing in a multibyte locale when expanding $'...'; cosmetic fix for select command when read returns EOF; fix for a forced interactive shell running under emacs with a pipe for stdin; fix for local nameref variables with the same name as variables in a function's temporary environment; fix for `x=y local -n x' not making `x' visible as a local variable; turn off nameref attribute for local nameref variables converted to arrays --- CWRU/CWRU.chlog | 59 ++++++++++++++++++++++++++++++++++-- Makefile.in | 4 +-- builtins/declare.def | 35 ++++++++++++++++++++-- execute_cmd.c | 71 ++++++++++++++++++++++++++------------------ input.c | 27 +++++++++++++---- input.h | 5 ++-- lib/sh/strtrans.c | 16 ++++++---- parse.y | 6 ++-- shell.c | 6 +++- support/shobj-conf | 2 +- tests/run-all | 2 +- tests/varenv.right | 12 ++++++++ tests/varenv23.sub | 54 ++++++++++++++++++++++++++++++++- variables.c | 43 +++++++++++++-------------- 14 files changed, 263 insertions(+), 79 deletions(-) diff --git a/CWRU/CWRU.chlog b/CWRU/CWRU.chlog index 17786954..831cf1c9 100644 --- a/CWRU/CWRU.chlog +++ b/CWRU/CWRU.chlog @@ -14,7 +14,7 @@ Makefile.in builtins/wait.def - wait_builtin: don't assign the variable given with -p if there are no jobs to wait for. - Report and fix from OÄuz + Report and fix from Oğuz arrayfunc.c - kvpair_assignment_p: return non-zero if argument L appears to be a @@ -12087,5 +12087,60 @@ variables.c - makunbound: if we are assigning to the variable being unset, just empty it out and set it to invisible, assuming the variable will be marked as visible after assignment - Report from Oguz back on 8/8/2025 + Report from Oğuz back on 8/8/2025 +lib/sh/strtrans.c + - ansicstr: only consume an entire multibyte character with mbrtowc + if we are in a locale where a backslash can appear as part of a + multibyte character. UTF-8 encodings don't allow this, so we can + consume a byte at a time + - ansic_quote,ansic_shouldquote: tighten check for potential start + of a multibyte character, so we call mbrtowc fewer times + Report and patch from Grisha Levit + +execute_cmd.c + - select_query: write '\n' to stderr like other `select' command output + if `read' returns failure (EOF) + Report from Stan Marsh + + 10/23 + ----- +input.c,input.h,parse.y + - getc_with_restart -> stream_getc, ungetc_with_restart -> stream_ungetc + - stream_setsize: new function to allow caller to set the amount read + by each call to read(2). + +shell.c + - set_bash_input: if we are forced interactive (-i), running under + emacs (INSIDE_EMACS=something), and the default input is a pipe + (fd_ispipe (0)), set the read size to 1 with stream_setsize + Report from Rudi Horn + +builtins/declare.def + - declare_internal: if we're making a local variable + (VARIABLE_CONTEXT != 0) and we act on a variable from a function's + temporary environment at the right context, make sure that variable + has the local attribute so commands like `local -p' will find it. + Report from Grisha Levit + - declare_internal: if we're taking a variable from the temporary + environment and making a local nameref out of it, make sure we + create a local variable and inherit the value appropriately. Fixes + `x=tempenv local -n x' not finding `x' as a local variable + +variables.c + - make_local_array_variable,make_local_assoc_variable: turn off the + nameref attribute even if the nameref variable we're converting + to an array already has a value. Modification of change from 10/20 + - find_variable_nameref: save and restore last_table_searched around + calls to internal_warning, since the gettext calls might look up + LC_ variables and change it (not used right now anyway) + - make_local_variable: if we find a variable with att_tempenv set, + make sure we actually have a temporary environment and we found + the variable somewhere other than the temporary environment passed + to declare/local -- we could have found it in a function-level + temporary environment. This fixes the `local -n x; local -A x;' bug + reported by Grisha Levit + - make_local_variable: since declare_internal now adds the local + attribute to variables from the temporary environment when + `local -n' is executed (for now), return that variable instead of + creating a new one diff --git a/Makefile.in b/Makefile.in index 5a5ea112..68557197 100644 --- a/Makefile.in +++ b/Makefile.in @@ -173,8 +173,8 @@ LDFLAGS_FOR_BUILD = @LDFLAGS_FOR_BUILD@ $(LOCAL_LDFLAGS) $(CFLAGS_FOR_BUILD) ASAN_XCFLAGS = -fsanitize=address -fno-omit-frame-pointer ASAN_XLDFLAGS = -fsanitize=address -UBSAN_XCFLAGS = -fsanitize=undefined -fsanitize-recover -fstack-protector -UBSAN_XLDFLAGS = -fsanitize=undefined +UBSAN_XCFLAGS = -fsanitize=undefined -fsanitize=local-bounds -fsanitize=vptr -fsanitize-recover -fstack-protector +UBSAN_XLDFLAGS = -fsanitize=undefined -fsanitize=local-bounds -fsanitize=vptr GCOV_XCFLAGS = -fprofile-arcs -ftest-coverage GCOV_XLDFLAGS = -fprofile-arcs -ftest-coverage diff --git a/builtins/declare.def b/builtins/declare.def index 9aea47c8..10d53ba2 100644 --- a/builtins/declare.def +++ b/builtins/declare.def @@ -656,19 +656,48 @@ restart_new_var_name: #endif if (offset == 0 && (flags_on & att_nameref)) { - /* First look for refvar at current scope */ + /* First look for refvar at current scope. Since we're turning + on the nameref flag, it doesn't matter whether it's already + a nameref. */ refvar = find_variable_last_nameref (name, 1); + /* VARIABLE_CONTEXT != 0, so we are attempting to create or modify the attributes for a local variable at the same scope. If we've used a reference from a previous context to resolve VAR, we - want to throw REFVAR and VAR away and create a new local var. */ + want to throw REFVAR and VAR away and create a new local var. + If we use REFVAR at the same context, make sure the att_local + attribute is set. By the time we exit this clause, VAR points + to a local variable at the current context. */ if (refvar && refvar->context != variable_context) { refvar = 0; var = make_local_variable (name, inherit_flag); } else if (refvar && refvar->context == variable_context) - var = refvar; + { + SHELL_VAR *tv; + + /* We handle the difference between a variable found in + the function's temporary environment and the temporary + environment passed to declare/local. */ + /* handle case of something like `x=value local -n x' -- we + want to make sure we create a local variable */ + tv = find_tempenv_variable (name); + if (tv && tv == refvar) + { + refvar = 0; + var = make_local_variable (name, inherit_flag); + } + else + { + /* Make sure we modify a variable in a function temporary + environment to have the local attribute so it shows + up as a local variable. */ + if (tempvar_p (refvar) && variable_context) + VSETATTR (refvar, att_local); + var = refvar; + } + } /* Maybe we just want to create a new local variable */ else if ((var = find_variable (name)) == 0 || var->context != variable_context) var = make_local_variable (name, inherit_flag); diff --git a/execute_cmd.c b/execute_cmd.c index 8d4e9e67..e2e0b4f0 100644 --- a/execute_cmd.c +++ b/execute_cmd.c @@ -190,6 +190,7 @@ static int execute_coproc (COMMAND *, int, int, struct fd_bitmap *); static int execute_pipeline (COMMAND *, int, int, int, struct fd_bitmap *); +static int execute_list (COMMAND *, int, int, int, struct fd_bitmap *); static int execute_connection (COMMAND *, int, int, int, struct fd_bitmap *); static int execute_intern_function (WORD_DESC *, FUNCTION_DEF *); @@ -2841,6 +2842,45 @@ execute_pipeline (COMMAND *command, int asynchronous, int pipe_in, int pipe_out, return (exec_result); } +/* This is a placeholder for future work */ +static int +execute_list (COMMAND *command, int asynchronous, int pipe_in, int pipe_out, struct fd_bitmap *fds_to_close) +{ + int ignore_return, invert, exec_result, n; + COMMAND *first, *second; + + ignore_return = (command->flags & CMD_IGNORE_RETURN) != 0; + invert = (command->flags & CMD_INVERT_RETURN) != 0; + + interrupt_execution++; retain_fifos++; + QUIT; + + first = command->value.Connection->first; + second = command->value.Connection->second; + + if (ignore_return || invert) + { + if (first) + first->flags |= CMD_IGNORE_RETURN; + if (second) + second->flags |= CMD_IGNORE_RETURN; + } + + exec_result = execute_command (first); + + QUIT; +#if defined (JOB_CONTROL) + if (command->value.Connection->connector == ';' && job_control && interactive && posixly_correct == 0) + notify_and_cleanup (-1); +#endif + optimize_connection_fork (command); /* XXX */ + exec_result = execute_command_internal (second, asynchronous, + pipe_in, pipe_out, fds_to_close); + + interrupt_execution--; retain_fifos--; + return exec_result; +} + static int execute_connection (COMMAND *command, int asynchronous, int pipe_in, int pipe_out, struct fd_bitmap *fds_to_close) { @@ -2897,34 +2937,7 @@ execute_connection (COMMAND *command, int asynchronous, int pipe_in, int pipe_ou /* Just call execute command on both sides. */ case ';': case '\n': /* special case, happens in command substitutions */ - if (ignore_return || invert) - { - if (command->value.Connection->first) - command->value.Connection->first->flags |= CMD_IGNORE_RETURN; - if (command->value.Connection->second) - command->value.Connection->second->flags |= CMD_IGNORE_RETURN; - } - interrupt_execution++; retain_fifos++; - QUIT; - -#if 1 - execute_command (command->value.Connection->first); -#else - execute_command_internal (command->value.Connection->first, - asynchronous, pipe_in, pipe_out, - fds_to_close); -#endif - - QUIT; -#if defined (JOB_CONTROL) - if (command->value.Connection->connector == ';' && job_control && interactive && posixly_correct == 0) - notify_and_cleanup (-1); -#endif - optimize_connection_fork (command); /* XXX */ - exec_result = execute_command_internal (command->value.Connection->second, - asynchronous, pipe_in, pipe_out, - fds_to_close); - interrupt_execution--; retain_fifos--; + exec_result = execute_list (command, asynchronous, pipe_in, pipe_out, fds_to_close); break; case '|': @@ -3480,7 +3493,7 @@ select_query (WORD_LIST *list, int list_len, char *prompt, int print_menu) executing_builtin = oe; if (r != EXECUTION_SUCCESS) { - putchar ('\n'); + fputc ('\n', stderr); return ((char *)NULL); } repl_string = get_string_value ("REPLY"); diff --git a/input.c b/input.c index 9995b94f..180b7e12 100644 --- a/input.c +++ b/input.c @@ -60,16 +60,22 @@ extern int errno; extern void termsig_handler (int); /* Functions to handle reading input on systems that don't restart read(2) - if a signal is received. */ + if a signal is received. This is a very minimal buffered input system + without synchronization, intended to read from the file descriptor + associated with a stdio stream. stream_setsize(size) allows the caller/ + user to set the amount read with each call to read(2). */ -static char localbuf[1024]; -static int local_index = 0, local_bufused = 0; +#define LOCALBUF_BUFSIZE 1024 +static char localbuf[LOCALBUF_BUFSIZE]; +static size_t local_index = 0; +static ssize_t local_bufused = 0; +static size_t read_bufsize = LOCALBUF_BUFSIZE; /* Posix and USG systems do not guarantee to restart read () if it is interrupted by a signal. We do the read ourselves, and restart it if it returns EINTR. */ int -getc_with_restart (FILE *stream) +stream_getc (FILE *stream) { unsigned char uc; @@ -83,7 +89,7 @@ getc_with_restart (FILE *stream) QUIT; run_pending_traps (); - local_bufused = read (fileno (stream), localbuf, sizeof(localbuf)); + local_bufused = read (fileno (stream), localbuf, read_bufsize); if (local_bufused > 0) break; else if (local_bufused == 0) @@ -116,7 +122,7 @@ getc_with_restart (FILE *stream) } int -ungetc_with_restart (int c, FILE *stream) +stream_ungetc (int c, FILE *stream) { if (local_index == 0 || c == EOF) return EOF; @@ -124,6 +130,15 @@ ungetc_with_restart (int c, FILE *stream) return c; } +size_t +stream_setsize (size_t size) +{ + size_t o; + o = read_bufsize; + read_bufsize = size; + return o; +} + /* A facility similar to stdio, but input-only. */ #if defined (USING_BASH_MALLOC) diff --git a/input.h b/input.h index 592b6cb9..b8bfb39c 100644 --- a/input.h +++ b/input.h @@ -105,8 +105,9 @@ extern int *save_token_state (void); extern void restore_token_state (int *); /* Functions from input.c */ -extern int getc_with_restart (FILE *); -extern int ungetc_with_restart (int, FILE *); +extern int stream_getc (FILE *); +extern int stream_ungetc (int, FILE *); +extern size_t stream_setsize (size_t); /* Functions from input.c. */ extern int fd_is_bash_input (int); diff --git a/lib/sh/strtrans.c b/lib/sh/strtrans.c index 586dc9f7..37addeb8 100644 --- a/lib/sh/strtrans.c +++ b/lib/sh/strtrans.c @@ -63,7 +63,7 @@ ansicstr (const char *string, size_t len, int flags, int *sawc, size_t *rlen) if (string == 0 || *string == '\0') return ((char *)0); - mb_cur_max = MB_CUR_MAX; + mb_cur_max = locale_mb_cur_max; #if defined (HANDLE_MULTIBYTE) temp = 4*len + 4; if (temp < 12) @@ -79,8 +79,10 @@ ansicstr (const char *string, size_t len, int flags, int *sawc, size_t *rlen) { clen = 1; #if defined (HANDLE_MULTIBYTE) - if ((locale_utf8locale && (c & 0x80)) || - (locale_utf8locale == 0 && mb_cur_max > 0 && is_basic (c) == 0)) + /* We read an entire multibyte character at a time if we are in a + locale where a backslash can possibly appear as part of a + multibyte character. UTF-8 encodings prohibit this. */ + if (locale_utf8locale == 0 && mb_cur_max > 0 && is_basic (c) == 0) { clen = mbrtowc (&wc, s - 1, mb_cur_max, 0); if (MB_NULLWCH (clen)) @@ -262,9 +264,10 @@ ansic_quote (const char *str, int flags, int *rlen) break; default: #if defined (HANDLE_MULTIBYTE) - if (is_basic (c) == 0) + if ((locale_utf8locale && (c & 0x80)) || + (locale_utf8locale == 0 && locale_mb_cur_max > 1 && is_basic (c) == 0)) { - clen = mbrtowc (&wc, s, MB_CUR_MAX, &state); + clen = mbrtowc (&wc, s, locale_mb_cur_max, &state); if (MB_NULLWCH (clen)) goto quote_end; if (MB_INVALIDCH (clen)) @@ -346,7 +349,8 @@ ansic_shouldquote (const char *string) for (s = string; c = *s; s++) { #if defined (HANDLE_MULTIBYTE) - if (is_basic (c) == 0) + if ((locale_utf8locale && (c & 0x80)) || + (locale_utf8locale == 0 && locale_mb_cur_max > 1 && is_basic (c) == 0)) return (ansic_wshouldquote (s)); #endif if (ISPRINT (c) == 0) diff --git a/parse.y b/parse.y index ab042daf..97d188b9 100644 --- a/parse.y +++ b/parse.y @@ -1846,9 +1846,9 @@ yy_stream_get (void) result = EOF; if (bash_input.location.file) { - /* XXX - don't need terminate_immediately; getc_with_restart checks + /* XXX - don't need terminate_immediately; stream_getc checks for terminating signals itself if read returns < 0 */ - result = getc_with_restart (bash_input.location.file); + result = stream_getc (bash_input.location.file); } return (result); } @@ -1856,7 +1856,7 @@ yy_stream_get (void) static int yy_stream_unget (int c) { - return (ungetc_with_restart (c, bash_input.location.file)); + return (stream_ungetc (c, bash_input.location.file)); } void diff --git a/shell.c b/shell.c index e59029cc..8e595d33 100644 --- a/shell.c +++ b/shell.c @@ -1751,7 +1751,11 @@ set_bash_input (void) } } else - with_input_from_stream (default_input, dollar_vars[0]); + { + with_input_from_stream (default_input, dollar_vars[0]); + if (forced_interactive && running_under_emacs && fd_ispipe (fileno (default_input))) + stream_setsize (1); + } } /* Close the current shell script input source and forget about it. This is diff --git a/support/shobj-conf b/support/shobj-conf index c3df351a..b62d5cbf 100644 --- a/support/shobj-conf +++ b/support/shobj-conf @@ -364,7 +364,7 @@ hpux11*) SHLIB_STATUS=unsupported # If you are using the HP ANSI C compiler, you can uncomment and use - # this code from michael.osipov@siemens.com (I have not tested it) + # this code from michael.osipov@innomotics.com (I have not tested it) # SHOBJ_CFLAGS='+z' # SHOBJ_LD='$(CC)' # SHOBJ_LDFLAGS='-b -Wl,+s -Wl,+h,$@' diff --git a/tests/run-all b/tests/run-all index 184c1a6a..206e6928 100644 --- a/tests/run-all +++ b/tests/run-all @@ -90,7 +90,6 @@ do if sh $x; then passed=$(( $passed + 1 )) else - failed=$(( $failed + 1 )) echo "${CSTART}${x}: NON-ZERO exit status $?${CEND}" if [ -n "$BASH_TSTOUT_SAVEDIR" ]; then mv ${BASH_TSTOUT} ${BASH_TSTOUT_SAVEDIR}/${x}-${SUFFIX} @@ -98,6 +97,7 @@ do elif [ $failed -eq 1 ]; then echo "${CSTART}Set environment variable BASH_TSTOUT_SAVEDIR to a directory to save failed test output${CEND}" fi + failed=$(( $failed + 1 )) fi rm -f ${BASH_TSTOUT} ;; diff --git a/tests/varenv.right b/tests/varenv.right index 7bb51b6a..b8c861a7 100644 --- a/tests/varenv.right +++ b/tests/varenv.right @@ -333,6 +333,18 @@ f: 3 global: declare -rx c="3" f1: 4 global: declare -- b="8" +declare -nx var="tempenv" +declare -- var="global" +declare -x var="tempenv" +declare -- var="global" +declare -nx var="x" +declare -nx var="var" +./varenv23.sub: line 132: warning: var: circular name reference +./varenv23.sub: line 132: warning: var: circular name reference +./varenv23.sub: line 132: warning: var: removing nameref attribute +declare -Ax var=() +declare -x var +declare -- var="g" varenv24.sub f1: x = local f2: x = local diff --git a/tests/varenv23.sub b/tests/varenv23.sub index 03202ad4..8f82821d 100644 --- a/tests/varenv23.sub +++ b/tests/varenv23.sub @@ -92,4 +92,56 @@ unset -f f f1 # can't use c from here on - +foo() +{ + var=tempenv local -n var + local -p var +} + +var=global +var=funcenv foo +declare -p var + +unset -f foo +unset -v var + +foo() +{ + var=tempenv local var + local -p var +} + +var=global +var=funcenv foo +declare -p var + +unset -f foo +unset -v var + +foo() +{ + local -n var; local -p var +} +var=x foo +unset -f foo +unset -v var + +f() +{ + local -n var; local -p var + local -A var; local -p var + (local var=(z)) +} +var=var f + +unset -f funset -v var + +f() { typeset var=x; unset var; typeset -p var; } +var=g +var=t f +unset -f f + +f() { unset var; typeset -p var; } +var=g +var=t f +unset -f f diff --git a/variables.c b/variables.c index 543b4bf7..2d23a0f2 100644 --- a/variables.c +++ b/variables.c @@ -185,7 +185,7 @@ static int winsize_assignment; /* currently assigning to LINES or COLUMNS */ SHELL_VAR nameref_invalid_value; static SHELL_VAR nameref_maxloop_value; -static HASH_TABLE *last_table_searched; /* hash_lookup sets this */ +static HASH_TABLE *last_table_searched; /* hash_lookup sets this, not checked right now */ static VAR_CONTEXT *last_context_searched; /* Some forward declarations. */ @@ -284,6 +284,7 @@ static SHELL_VAR *new_shell_variable (const char *); static SHELL_VAR *make_new_variable (const char *, HASH_TABLE *); static SHELL_VAR *bind_variable_internal (const char *, const char *, HASH_TABLE *, int, int); +static void init_variable (SHELL_VAR *); static void init_shell_variable (SHELL_VAR *); static void dispose_variable_value (SHELL_VAR *); @@ -2013,6 +2014,7 @@ find_variable_nameref (SHELL_VAR *v) int level, flags; char *newname; SHELL_VAR *orig, *oldv; + HASH_TABLE *savelast; level = 0; orig = v; @@ -2032,7 +2034,9 @@ find_variable_nameref (SHELL_VAR *v) v = find_variable_internal (newname, flags); if (v == orig || v == oldv) { + savelast = last_table_searched; internal_warning (_("%s: circular name reference"), orig->name); + last_table_searched = savelast; #if 1 /* XXX - provisional change - circular refs go to global scope for resolution, without namerefs. */ @@ -2609,10 +2613,14 @@ make_local_variable (const char *name, int flags) (which results in duplicate names in the same VAR_CONTEXT->table */ /* We can't just test tmpvar_p because variables in the temporary env given to a shell function appear in the function's local variable VAR_CONTEXT - but retain their tempvar attribute. We want temporary variables that are - found in temporary_env, hence the test for last_table_searched, which is - set in hash_lookup and only (so far) checked here. */ - if (was_tmpvar && old_var->context == variable_context && last_table_searched != temporary_env) + but retain their tempvar attribute. We want to handle temporary + variables that are not found in temporary_env, hence the test that + temporary_env exists and it's where we found the variable. + If the variable appears in the temporary environment passed to + local/declare, declare_internal will handle it. */ + /* This definitely bears rethinking. */ + if (was_tmpvar && old_var->context == variable_context && temporary_env && + (new_var = hash_lookup (name, temporary_env)) && new_var != old_var) { VUNSETATTR (old_var, att_invisible); /* XXX */ /* We still want to flag this variable as local, though, and set things @@ -2624,8 +2632,6 @@ make_local_variable (const char *name, int flags) if (vc_isfuncenv (vc) && vc->scope == variable_context) break; goto set_local_var_flags; - - return (old_var); } /* If we want to change to "inherit the old variable's value" semantics, @@ -2667,23 +2673,16 @@ make_local_variable (const char *name, int flags) if (old_var == 0) new_var = make_new_variable (name, vc->table); - else + else if (was_tmpvar && (new_var = hash_lookup (name, vc->table)) && new_var == old_var) { -#if 0 /* This handles the case where a variable is found in both the temporary environment *and* declared as a local variable. If we want to avoid multiple entries with the same name in VC->table (that might mess up - unset), we need to use the existing variable entry and destroy the - current value. Currently disabled because it doesn't matter -- the - right things happen. */ - new_var = 0; - if (was_tmpvar && (new_var = hash_lookup (name, vc->table))) - { - dispose_variable_value (new_var); - init_variable (new_var); - } - if (new_var == 0) -#endif + unset), we need to use the existing variable entry. declare_internal + will do the remaining work. */ + } + else + { new_var = make_new_variable (name, vc->table); /* If we found this variable in one of the temporary environments, @@ -2825,7 +2824,7 @@ make_local_array_variable (const char *name, int flags) return var; /* array variables cannot be namerefs */ - if (var && nameref_p (var) && invisible_p (var)) + if (var && nameref_p (var) /* && invisible_p (var)*/) { internal_warning (_("%s: removing nameref attribute"), name); VUNSETATTR (var, att_nameref); @@ -2888,7 +2887,7 @@ make_local_assoc_variable (const char *name, int flags) return var; /* assoc variables cannot be namerefs */ - if (var && nameref_p (var) && invisible_p (var)) + if (var && nameref_p (var) /*&& invisible_p (var)*/) { internal_warning (_("%s: removing nameref attribute"), name); VUNSETATTR (var, att_nameref); -- 2.47.3