column = 0;
else if (g.ch == '\t')
column += TAB_WIDTH - column % TAB_WIDTH;
- else /* if (c32isprint (g.ch)) */
+ else
{
- last_character_width = (counting_mode == COUNT_CHARACTERS
- ? 1 : c32width (g.ch));
+ if (counting_mode == COUNT_CHARACTERS)
+ last_character_width = 1;
+ else
+ {
+ int width = c32width (g.ch);
+ /* Default to a width of 1 if there is an invalid character. */
+ last_character_width = width < 0 ? 1 : width;
+ }
column += last_character_width;
}
}
fadvise (istream, FADVISE_SEQUENTIAL);
while (0 < (length_in = fread (line_in + offset_in, 1,
- sizeof line_in - offset_in, istream)))
+ sizeof line_in - offset_in, istream))
+ || 0 < offset_in)
{
char *p = line_in;
char *lim = p + length_in + offset_in;
{
/* Replace the character with the byte if it cannot be a
truncated multibyte sequence. */
- if (!(lim - p <= MCEL_LEN_MAX))
+ if (!(lim - p <= MCEL_LEN_MAX) || length_in == 0)
g.ch = p[0];
else
{
fold --characters input3 | tail -n 4 > out3 || fail=1
compare exp3 out3 || fail=1
+# Sequence derived from <https://datatracker.ietf.org/doc/rfc9839>.
+bad_unicode ()
+{
+ # invalid UTF8|unpaired surrogate|NUL|C1 control|noncharacter
+ env printf '\xC3|\xED\xBA\xAD|\u0000|\u0089|\xED\xA6\xBF\xED\xBF\xBF\n'
+}
+bad_unicode > /dev/null || framework_failure_
+test $({ bad_unicode | fold; bad_unicode; } | uniq | wc -l) = 1 || fail=1
+# Check bad character at EOF
+test $(env printf '\xC3' | fold | wc -c) = 1 || fail=1
+
# Ensure bounded memory operation
vm=$(get_min_ulimit_v_ fold /dev/null) && {
yes | tr -d '\n' | (ulimit -v $(($vm+8000)) && fold 2>err) | head || fail=1