#include "system.h"
#include "fadvise.h"
+#include "mcel.h"
#include "xdectoint.h"
#define TAB_WIDTH 8
/* If nonzero, try to break on whitespace. */
static bool break_spaces;
-/* If nonzero, count bytes, not column positions. */
-static bool count_bytes;
+/* Mode to operate in. */
+static enum
+ {
+ COUNT_COLUMNS,
+ COUNT_BYTES,
+ COUNT_CHARACTERS
+ } counting_mode = COUNT_COLUMNS;
/* If nonzero, at least one of the files we read was standard input. */
static bool have_read_stdin;
-static char const shortopts[] = "bsw:0::1::2::3::4::5::6::7::8::9::";
+/* Width of last read character. */
+static int last_character_width = 0;
+
+static char const shortopts[] = "bcsw:0::1::2::3::4::5::6::7::8::9::";
static struct option const longopts[] =
{
{"bytes", no_argument, nullptr, 'b'},
+ {"characters", no_argument, nullptr, 'c'},
{"spaces", no_argument, nullptr, 's'},
{"width", required_argument, nullptr, 'w'},
{GETOPT_HELP_OPTION_DECL},
fputs (_("\
-b, --bytes count bytes rather than columns\n\
+ -c, --characters count characters rather than columns\n\
-s, --spaces break at spaces\n\
-w, --width=WIDTH use WIDTH columns instead of 80\n\
"), stdout);
The first column is 0. */
static size_t
-adjust_column (size_t column, char c)
+adjust_column (size_t column, mcel_t g)
{
- if (!count_bytes)
+ if (counting_mode != COUNT_BYTES)
{
- if (c == '\b')
+ if (g.ch == '\b')
{
if (column > 0)
- column--;
+ column -= last_character_width;
}
- else if (c == '\r')
+ else if (g.ch == '\r')
column = 0;
- else if (c == '\t')
+ else if (g.ch == '\t')
column += TAB_WIDTH - column % TAB_WIDTH;
- else /* if (isprint (c)) */
- column++;
+ else /* if (c32isprint (g.ch)) */
+ {
+ last_character_width = (counting_mode == COUNT_CHARACTERS
+ ? 1 : c32width (g.ch));
+ column += last_character_width;
+ }
}
else
- column++;
+ column += g.len;
return column;
}
fold_file (char const *filename, size_t width)
{
FILE *istream;
- int c;
size_t column = 0; /* Screen column where next char will go. */
idx_t offset_out = 0; /* Index in 'line_out' for next char. */
static char *line_out = nullptr;
static idx_t allocated_out = 0;
+ static char *line_in = nullptr;
+ static size_t allocated_in = 0;
+ static ssize_t length_in = 0;
int saved_errno;
if (STREQ (filename, "-"))
fadvise (istream, FADVISE_SEQUENTIAL);
- while ((c = getc (istream)) != EOF)
+ while (0 <= (length_in = getline (&line_in, &allocated_in, istream)))
{
- if (allocated_out - offset_out <= 1)
- line_out = xpalloc (line_out, &allocated_out, 1, -1, sizeof *line_out);
-
- if (c == '\n')
+ char *p = line_in;
+ char *lim = p + length_in;
+ mcel_t g;
+ for (; p < lim; p += g.len)
{
- line_out[offset_out++] = c;
- fwrite (line_out, sizeof (char), offset_out, stdout);
- column = offset_out = 0;
- continue;
- }
-
- rescan:
- column = adjust_column (column, c);
-
- if (column > width)
- {
- /* This character would make the line too long.
- Print the line plus a newline, and make this character
- start the next line. */
- if (break_spaces)
+ g = mcel_scan (p, lim);
+ if (allocated_out - offset_out <= g.len)
+ line_out = xpalloc (line_out, &allocated_out, g.len, -1,
+ sizeof *line_out);
+ if (g.ch == '\n')
{
- bool found_blank = false;
- idx_t logical_end = offset_out;
+ memcpy (line_out + offset_out, p, g.len);
+ offset_out += g.len;
+ fwrite (line_out, sizeof (char), offset_out, stdout);
+ column = offset_out = 0;
+ continue;
+ }
+ rescan:
+ column = adjust_column (column, g);
- /* Look for the last blank. */
- while (logical_end)
+ if (column > width)
+ {
+ /* This character would make the line too long.
+ Print the line plus a newline, and make this character
+ start the next line. */
+ if (break_spaces)
{
- --logical_end;
- if (isblank (to_uchar (line_out[logical_end])))
+ int space_length = 0;
+ idx_t logical_end = offset_out;
+ char *logical_p = line_out;
+ char *logical_lim = logical_p + logical_end;
+
+ for (mcel_t g2; logical_p < logical_lim; logical_p += g2.len)
{
- found_blank = true;
- break;
+ g2 = mcel_scan (logical_p, logical_lim);
+ if (c32isblank (g2.ch))
+ {
+ space_length = g2.len;
+ logical_end = logical_p - line_out;
+ }
+ }
+
+ if (space_length)
+ {
+ logical_end += space_length;
+ /* Found a blank. Don't output the part after it. */
+ fwrite (line_out, sizeof (char), logical_end, stdout);
+ putchar ('\n');
+ /* Move the remainder to the beginning of the next line.
+ The areas being copied here might overlap. */
+ memmove (line_out, line_out + logical_end,
+ offset_out - logical_end);
+ offset_out -= logical_end;
+ column = 0;
+ char *printed_p = line_out;
+ char *printed_lim = printed_p + offset_out;
+ for (mcel_t g2; printed_p < printed_lim;
+ printed_p += g2.len)
+ {
+ g2 = mcel_scan (printed_p, printed_lim);
+ column = adjust_column (column, g2);
+ }
+ goto rescan;
}
}
- if (found_blank)
+ if (offset_out == 0)
{
- /* Found a blank. Don't output the part after it. */
- logical_end++;
- fwrite (line_out, sizeof (char), logical_end, stdout);
- putchar ('\n');
- /* Move the remainder to the beginning of the next line.
- The areas being copied here might overlap. */
- memmove (line_out, line_out + logical_end,
- offset_out - logical_end);
- offset_out -= logical_end;
- column = 0;
- for (idx_t i = 0; i < offset_out; i++)
- column = adjust_column (column, line_out[i]);
- goto rescan;
+ memcpy (line_out + offset_out, p, g.len);
+ offset_out += g.len;
+ continue;
}
- }
- if (offset_out == 0)
- {
- line_out[offset_out++] = c;
- continue;
+ line_out[offset_out++] = '\n';
+ fwrite (line_out, sizeof (char), offset_out, stdout);
+ column = offset_out = 0;
+ goto rescan;
}
- line_out[offset_out++] = '\n';
- fwrite (line_out, sizeof (char), offset_out, stdout);
- column = offset_out = 0;
- goto rescan;
+ memcpy (line_out + offset_out, p, g.len);
+ offset_out += g.len;
}
-
- line_out[offset_out++] = c;
}
saved_errno = errno;
atexit (close_stdout);
- break_spaces = count_bytes = have_read_stdin = false;
+ break_spaces = have_read_stdin = false;
while ((optc = getopt_long (argc, argv, shortopts, longopts, nullptr)) != -1)
{
switch (optc)
{
case 'b': /* Count bytes rather than columns. */
- count_bytes = true;
+ counting_mode = COUNT_BYTES;
+ break;
+
+ case 'c': /* Count characters rather than columns. */
+ counting_mode = COUNT_CHARACTERS;
break;
case 's': /* Break at word boundaries. */
--- /dev/null
+#!/bin/sh
+# Test fold --characters.
+
+# Copyright (C) 2025 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+
+. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src
+print_ver_ fold printf
+
+test "$LOCALE_FR_UTF8" != none || skip_ "French UTF-8 locale not available"
+
+LC_ALL=$LOCALE_FR_UTF8
+export LC_ALL
+
+# The string "뉐뉐뉐" is 3 characters, but occupies 6 columns.
+env printf '\uB250\uB250\uB250\n' > input1 || framework_failure_
+env printf '\uB250\uB250\n\uB250\n' > column-exp1 || framework_failure_
+
+fold -w 5 input1 > column-out1 || fail=1
+compare column-exp1 column-out1 || fail=1
+
+# Should be the same as the input.
+fold --characters -w 5 input1 > characters-out1 || fail=1
+compare input1 characters-out1 || fail=1
+
+# Test with 50 2 column wide characters.
+for i in $(seq 50); do
+ env printf '\uFF1A' >> input2 || framework_failure_
+ env printf '\uFF1A' >> column-exp2 || framework_failure_
+ env printf '\uFF1A' >> character-exp2 || framework_failure_
+ if test $(($i % 5)) -eq 0; then
+ env printf '\n' >> column-exp2 || framework_failure_
+ fi
+ if test $(($i % 10)) -eq 0; then
+ env printf '\n' >> character-exp2 || framework_failure_
+ fi
+done
+
+env printf '\n' >> input2 || framework_failure_
+
+# 5 characters per line.
+fold -w 10 input2 > column-out2 || fail=1
+compare column-exp2 column-out2 || fail=1
+
+# 10 characters per line.
+fold --characters -w 10 input2 > character-out2 || fail=1
+compare character-exp2 character-out2 || fail=1
+
+Exit $fail