lib/glob/sm_loop.c
- parse_collsym: make sure to not return an out-of-bounds read if a
collating symbol is unterminated. Fixes OOB read reported by
- op7ic \x00 <op7ica@gmail.com>
+ Jerzy Kramarz <op7ica@gmail.com>
- brackmatch: after incrementing p, before checking whether it's a
character range, check whether *p was NULL before the increment
and short-circuit the bracket expression if it is
- parameter_brace_{patsub,remove_pattern,transform,casemod}: save and
restore this_command_name while temporarily setting it for use in
error messages. Fixes use-after-free error reported by
- op7ic \x00 <op7ica@gmail.com>
+ Jerzy Kramarz <op7ica@gmail.com>
- string_extract_verbatim: make sure when we increment i by 2 due to
a CTLESC or CTLESC-CTLNUL that we don't read past the end of the
string. This can happen if the string ends with an odd number of
CTLESC chars. Fixes oob-read error reported by
- op7ic \x00 <op7ica@gmail.com>
+ Jerzy Kramarz <op7ica@gmail.com>
11/11
-----
- u32cconv: even if we don't have iconv, if locale_utf8locale is non-
zero, return u32toutf8
+ 11/15
+ -----
+lib/readline/nls.c
+ - _rl_init_locale: new function, split off code that determines current
+ locale from _rl_init_eightbit, use it to set _rl_utf8locale
+
+lib/readiline/rlprivate.h
+ - _rl_init_locale: new extern declaration
+
+lib/readline/readline.c
+ - rl_initialize: if not initializing everything the first time, call
+ _rl_init_locale to check current locale and set _rl_utf8locale
+
+lib/readline/text.c
+ - _rl_insert_char: optimize cases where we are inserting a single-byte
+ character in a locale with multibyte characters
+ - _rl_insert_char: check whether character is single byte if we know
+ we are in a UTF-8 locale, optimize single-byte case to avoid calls
+ to mbrtowc and memmove/memcpy
+
+lib/readline/mbutil.c
+ - _rl_char_value: if we are in a UTF-8 locale (_rl_utf8locale) and the
+ current character does not have the eighth bit set ((c & 0x80) == 0),
+ return that char without bothering to call mbrtowc
+ - _rl_adjust_point: don't bother calling mbrlen if we are in a UTF-8
+ locale and the current character is not a multibyte character
+ - _rl_find_next_mbchar_internal: if we are in a UTF-8 locale, use that
+ to avoid calls to mbrtowc
+
+lib/readline/display.c
+ - _rl_col_width: if in a UTF-8 locale, take advantage of that to avoid
+ calls to mbrlen and mbrtowc
+ - rl_redisplay: if in a UTF-8 locale, take advantage of that to avoid
+ calls to mbrtowc
--- /dev/null
+BUILD_DIR=/usr/local/build/chet/bash/bash-current
+THIS_SH=$BUILD_DIR/bash
+PATH=$PATH:$BUILD_DIR
+
+export THIS_SH PATH
+
+export BASH_TSTOUT=/tmp/xx
+rm -f ${BASH_TSTOUT}
+
+/bin/sh "$@"
extern char *xstrchr __P((const char *, int));
extern int locale_mb_cur_max; /* XXX */
+extern int locale_utf8locale; /* XXX */
#ifndef MB_INVALIDCH
#define MB_INVALIDCH(x) ((x) == (size_t)-1 || (x) == (size_t)-2)
_f = is_basic ((_str)[_i]); \
if (_f) \
mblength = 1; \
+ else if (locale_utf8locale && (((_str)[_i] & 0x80) == 0)) \
+ mblength = 1; \
else \
{ \
state_bak = state; \
_k = is_basic (*(_src)); \
if (_k) \
mblength = 1; \
+ else if (locale_utf8locale && ((*(_src) & 0x80) == 0)) \
+ mblength = 1; \
else \
{ \
state_bak = state; \
#define PROMPT_ENDING_INDEX \
((MB_CUR_MAX > 1 && rl_byte_oriented == 0) ? prompt_physical_chars : prompt_last_invisible+1)
-
/* **************************************************************** */
/* */
/* Display stuff */
if (mb_cur_max > 1 && rl_byte_oriented == 0)
{
memset (&ps, 0, sizeof (mbstate_t));
- /* XXX - what if wc_bytes ends up <= 0? check for MB_INVALIDCH */
- wc_bytes = mbrtowc (&wc, rl_line_buffer, rl_end, &ps);
+ if (_rl_utf8locale && UTF8_SINGLEBYTE(rl_line_buffer[0]))
+ {
+ wc = (wchar_t)rl_line_buffer[0];
+ wc_bytes = 1;
+ }
+ else
+ wc_bytes = mbrtowc (&wc, rl_line_buffer, rl_end, &ps);
}
else
wc_bytes = 1;
if (mb_cur_max > 1 && rl_byte_oriented == 0)
{
in += wc_bytes;
- /* XXX - what if wc_bytes ends up <= 0? check for MB_INVALIDCH */
- wc_bytes = mbrtowc (&wc, rl_line_buffer + in, rl_end - in, &ps);
+ if (_rl_utf8locale && UTF8_SINGLEBYTE(rl_line_buffer[in]))
+ {
+ wc = (wchar_t)rl_line_buffer[in];
+ wc_bytes = 1;
+ memset (&ps, 0, sizeof (mbstate_t)); /* re-init state */
+ }
+ else
+ wc_bytes = mbrtowc (&wc, rl_line_buffer + in, rl_end - in, &ps);
}
else
in++;
#endif
-
}
line[out] = '\0';
if (cpos_buffer_position < 0)
while (point < start)
{
- tmp = mbrlen (str + point, max, &ps);
+ if (_rl_utf8locale && UTF8_SINGLEBYTE(str[point]))
+ {
+ memset (&ps, 0, sizeof (mbstate_t));
+ tmp = 1;
+ }
+ else
+ tmp = mbrlen (str + point, max, &ps);
if (MB_INVALIDCH ((size_t)tmp))
{
/* In this case, the bytes are invalid or too short to compose a
while (point < end)
{
- tmp = mbrtowc (&wc, str + point, max, &ps);
+ if (_rl_utf8locale && UTF8_SINGLEBYTE(str[point]))
+ {
+ tmp = 1;
+ wc = (wchar_t) str[point];
+ }
+ else
+ tmp = mbrtowc (&wc, str + point, max, &ps);
if (MB_INVALIDCH ((size_t)tmp))
{
/* In this case, the bytes are invalid or too short to compose a
len = strlen (string + point);
if (len == 0)
break;
- tmp = mbrtowc (&wc, string+point, len, &ps);
+ if (_rl_utf8locale && UTF8_SINGLEBYTE(string[point]))
+ {
+ tmp = 1;
+ wc = (wchar_t) string[point];
+ memset(&ps, 0, sizeof(mbstate_t));
+ }
+ else
+ tmp = mbrtowc (&wc, string+point, len, &ps);
if (MB_INVALIDCH ((size_t)tmp))
{
/* invalid bytes. assume a byte represents a character */
tmp = mbrtowc (&wc, string + point, length - point, &ps);
if (MB_INVALIDCH ((size_t)tmp))
{
- /* in this case, bytes are invalid or shorted to compose
+ /* in this case, bytes are invalid or too short to compose
multibyte char, so assume that the first byte represents
a single character anyway. */
tmp = 1;
while (pos < point)
{
- tmp = mbrlen (string + pos, length - pos, ps);
+ if (_rl_utf8locale && UTF8_SINGLEBYTE(string[pos]))
+ tmp = 1;
+ else
+ tmp = mbrlen (string + pos, length - pos, ps);
if (MB_INVALIDCH ((size_t)tmp))
{
- /* in this case, bytes are invalid or shorted to compose
+ /* in this case, bytes are invalid or too short to compose
multibyte char, so assume that the first byte represents
a single character anyway. */
pos++;
if (MB_LEN_MAX == 1 || rl_byte_oriented)
return ((wchar_t) buf[ind]);
+ if (_rl_utf8locale && UTF8_SINGLEBYTE(buf[ind]))
+ return ((wchar_t) buf[ind]);
l = strlen (buf);
if (ind >= l - 1)
return ((wchar_t) buf[ind]);
"iso88599",
"iso885910",
"koi8r",
+ "utf8",
0
};
#endif
}
-/* Check for LC_ALL, LC_CTYPE, and LANG and use the first with a value
- to decide the defaults for 8-bit character input and output. Returns
- 1 if we set eight-bit mode. */
-int
-_rl_init_eightbit ()
+/* Query the right environment variables and call setlocale() to initialize
+ the C library locale settings. */
+char *
+_rl_init_locale ()
{
-/* If we have setlocale(3), just check the current LC_CTYPE category
- value, and go into eight-bit mode if it's not C or POSIX. */
-#if defined (HAVE_SETLOCALE)
- char *lspec, *t;
+ char *ret, *lspec;
/* Set the LC_CTYPE locale category from environment variables. */
lspec = _rl_get_locale_var ("LC_CTYPE");
lspec = setlocale (LC_CTYPE, (char *)NULL);
if (lspec == 0)
lspec = "";
- t = setlocale (LC_CTYPE, lspec);
+ ret = setlocale (LC_CTYPE, lspec); /* ok, since it does not change locale */
+
+ _rl_utf8locale = (ret && *ret) ? utf8locale (ret) : 0;
+
+ return ret;
+}
+
+/* Check for LC_ALL, LC_CTYPE, and LANG and use the first with a value
+ to decide the defaults for 8-bit character input and output. Returns
+ 1 if we set eight-bit mode. */
+int
+_rl_init_eightbit ()
+{
+/* If we have setlocale(3), just check the current LC_CTYPE category
+ value, and go into eight-bit mode if it's not C or POSIX. */
+#if defined (HAVE_SETLOCALE)
+ char *lspec, *t;
- if (t && *t)
- _rl_utf8locale = utf8locale (t);
+ t = _rl_init_locale (); /* returns static pointer */
if (t && *t && (t[0] != 'C' || t[1]) && (STREQ (t, "POSIX") == 0))
{
_rl_output_meta_chars = 1;
break;
}
+
+ _rl_utf8locale = *t ? STREQ (t, "utf8") : 0;
+
xfree (t);
return (legal_lang_values[i] ? 1 : 0);
-
#endif /* !HAVE_SETLOCALE */
}
{
/* If we have never been called before, initialize the
terminal and data structures. */
- if (!rl_initialized)
+ if (rl_initialized == 0)
{
RL_SETSTATE(RL_STATE_INITIALIZING);
readline_initialize_everything ();
rl_initialized++;
RL_SETSTATE(RL_STATE_INITIALIZED);
}
+ else
+ (void)_rl_init_locale (); /* check current locale */
/* Initialize the current line information. */
_rl_init_line_state ();
# define IS_COMBINING_CHAR(x) (WCWIDTH(x) == 0)
#endif
+#define UTF8_SINGLEBYTE(c) (((c) & 0x80) == 0)
+
#else /* !HANDLE_MULTIBYTE */
#undef MB_LEN_MAX
#define MB_INVALIDCH(x) (0)
#define MB_NULLWCH(x) (0)
+#define UTF8_SINGLEBYTE(c) (1)
+
#endif /* !HANDLE_MULTIBYTE */
extern int rl_byte_oriented;
extern void _rl_revert_all_lines PARAMS((void));
/* nls.c */
+extern char *_rl_init_locale PARAMS((void));
extern int _rl_init_eightbit PARAMS((void));
/* parens.c */
incoming[1] = '\0';
incoming_length = 1;
}
+ else if (_rl_utf8locale && (c & 0x80) == 0)
+ {
+ incoming[0] = c;
+ incoming[1] = '\0';
+ incoming_length = 1;
+ }
else
{
wchar_t wc;
effect of mbstate is undefined. */
memset (&ps, 0, sizeof (mbstate_t));
}
+ else if (ret == 1)
+ {
+ incoming[0] = pending_bytes[0];
+ incoming[incoming_length = 1] = '\0';
+ pending_bytes_length = 0;
+ }
else
{
/* We successfully read a single multibyte character. */
i = 0;
while (i < string_size)
{
- strncpy (string + i, incoming, incoming_length);
- i += incoming_length;
+ if (incoming_length == 1)
+ string[i++] = *incoming;
+ else
+ {
+ strncpy (string + i, incoming, incoming_length);
+ i += incoming_length;
+ }
}
incoming_length = 0;
stored_count = 0;
i = 0;
while (i < string_size)
{
- strncpy (string + i, incoming, incoming_length);
- i += incoming_length;
+ if (incoming_length == 1)
+ string[i++] = *incoming;
+ else
+ {
+ strncpy (string + i, incoming, incoming_length);
+ i += incoming_length;
+ }
}
while (count)
int locale_utf8locale; /* set but unused for now */
int locale_mb_cur_max; /* value of MB_CUR_MAX for current locale (LC_CTYPE) */
+int locale_shiftstates;
extern int dump_translatable_strings, dump_po_strings;
locale_mb_cur_max = MB_CUR_MAX;
locale_utf8locale = locale_isutf8 (default_locale);
+ locale_shiftstates = mblen ((char *)NULL, 0);
}
/* Set default values for LC_CTYPE, LC_COLLATE, LC_MESSAGES, LC_NUMERIC and
locale_setblanks ();
locale_mb_cur_max = MB_CUR_MAX;
locale_utf8locale = locale_isutf8 (lc_all);
+ locale_shiftstates = mblen ((char *)NULL, 0);
u32reset ();
}
# endif
/* if LC_ALL == "", reset_locale_vars has already called this */
if (*lc_all && x)
locale_utf8locale = locale_isutf8 (lc_all);
+ locale_shiftstates = mblen ((char *)NULL, 0);
u32reset ();
return r;
#else
/* if setlocale() returns NULL, the locale is not changed */
if (x)
locale_utf8locale = locale_isutf8 (x);
+ locale_shiftstates = mblen ((char *)NULL, 0);
u32reset ();
}
# endif
locale_mb_cur_max = MB_CUR_MAX;
if (x)
locale_utf8locale = locale_isutf8 (x);
+ locale_shiftstates = mblen ((char *)NULL, 0);
u32reset ();
#endif
return 1;
regexp `^#define[ ]*PATCHLEVEL', since that's what support/mkversion.sh
looks for to find the patch level (for the sccs version string). */
-#define PATCHLEVEL 0
+#define PATCHLEVEL 5
#endif /* _PATCHLEVEL_H_ */
extern int shell_compatibility_level;
extern int locale_mb_cur_max;
+extern int locale_utf8locale;
/* Structure to pass around that holds a bitmap of file descriptors
to close, and the size of that structure. Used in execute_cmd.c. */
-BUILD_DIR=/usr/local/build/chet/bash/bash-current
+BUILD_DIR=/usr/local/build/bash/bash-current
THIS_SH=$BUILD_DIR/bash
PATH=$PATH:$BUILD_DIR
argv[1] = <A>
argv[1] = <B>
argv[1] = <a£\b>
-0000000 141 243 134 142
+0000000 141 243 134 142
0000004
ok 6
ok 7
+. ./test-glue-functions
+
var='ab\'
case $var in
unset a b v
recho "a${alpha}b"
-printf "%s" "a${alpha}b" | LC_ALL=C od -b
+printf "%s" "a${alpha}b" | LC_ALL=C od -b | _intl_normalize_spaces
a=$'\u3b1'
[[ $a = $a ]] && echo ok 6