#include "assure.h"
#include "fadvise.h"
#include "getndelim2.h"
+#include "ioblksize.h"
+#include "mbbuf.h"
#include "set-fields.h"
while (0)
-/* Pointer inside RP. When checking if a byte or field is selected
+/* Pointer inside RP. When checking if a -b,-c,-f is selected
by a finite range, we check if it is between CURRENT_RP.LO
- and CURRENT_RP.HI. If the byte or field index is greater than
+ and CURRENT_RP.HI. If the index is greater than
CURRENT_RP.HI then we make CURRENT_RP to point to the next range pair. */
static struct field_range_pair *current_rp;
/* True if we have ever read standard input. */
static bool have_read_stdin;
+/* Whether to cut bytes, characters, or fields. */
+static enum
+{
+ CUT_MODE_NONE,
+ CUT_MODE_BYTES,
+ CUT_MODE_CHARACTERS,
+ CUT_MODE_FIELDS
+} cut_mode = CUT_MODE_NONE;
+
/* For long options that have no equivalent short option, use a
non-character as a pseudo short option, starting with CHAR_MAX + 1. */
enum
}
}
+/* Read from STREAM, printing to standard output any selected characters. */
+
+static void
+cut_characters (FILE *stream)
+{
+ uintmax_t char_idx = 0;
+ bool print_delimiter = false;
+ static char line_in[IO_BUFSIZE];
+ mbbuf_t mbbuf;
+
+ current_rp = frp;
+ mbbuf_init (&mbbuf, line_in, sizeof line_in, stream);
+
+ while (true)
+ {
+ mcel_t g = mbbuf_get_char (&mbbuf);
+
+ if (g.ch == line_delim)
+ {
+ if (putchar (line_delim) < 0)
+ write_error ();
+ char_idx = 0;
+ print_delimiter = false;
+ current_rp = frp;
+ }
+ else if (g.ch == MBBUF_EOF)
+ {
+ if (char_idx > 0)
+ {
+ if (putchar (line_delim) < 0)
+ write_error ();
+ }
+ break;
+ }
+ else
+ {
+ next_item (&char_idx);
+ if (print_kth (char_idx))
+ {
+ if (output_delimiter_string != output_delimiter_default)
+ {
+ if (print_delimiter && is_range_start_index (char_idx))
+ {
+ if (fwrite (output_delimiter_string, sizeof (char),
+ output_delimiter_length, stdout)
+ != output_delimiter_length)
+ write_error ();
+ }
+ print_delimiter = true;
+ }
+
+ if (fwrite (mbbuf_char_offset (&mbbuf, g), sizeof (char), g.len,
+ stdout)
+ != g.len)
+ write_error ();
+ }
+ }
+ }
+}
+
/* Read from stream STREAM, printing to standard output any selected fields. */
static void
int optc;
bool ok;
bool delim_specified = false;
- bool byte_mode = false;
char *spec_list_string = NULL;
initialize_main (&argc, &argv);
switch (optc)
{
case 'b':
+ cut_mode = CUT_MODE_BYTES;
+ FALLTHROUGH;
case 'c':
- /* Build the byte list. */
- byte_mode = true;
+ if (optc == 'c')
+ cut_mode = CUT_MODE_CHARACTERS;
FALLTHROUGH;
case 'f':
- /* Build the field list. */
+ if (optc == 'f')
+ cut_mode = CUT_MODE_FIELDS;
if (spec_list_string)
FATAL_ERROR (_("only one list may be specified"));
spec_list_string = optarg;
if (!spec_list_string)
FATAL_ERROR (_("you must specify a list of bytes, characters, or fields"));
- if (byte_mode)
+ if (cut_mode == CUT_MODE_BYTES || cut_mode == CUT_MODE_CHARACTERS)
{
if (delim_specified)
FATAL_ERROR (_("an input delimiter may be specified only\
}
set_fields (spec_list_string,
- ((byte_mode ? SETFLD_ERRMSG_USE_POS : 0)
+ (((cut_mode == CUT_MODE_BYTES
+ || cut_mode == CUT_MODE_CHARACTERS)
+ ? SETFLD_ERRMSG_USE_POS : 0)
| (complement ? SETFLD_COMPLEMENT : 0)));
if (!delim_specified)
output_delimiter_length = 1;
}
- void (*cut_stream) (FILE *) = byte_mode ? cut_bytes : cut_fields;
+ void (*cut_stream) (FILE *) = NULL;
+ switch (cut_mode)
+ {
+ case CUT_MODE_NONE:
+ unreachable ();
+
+ case CUT_MODE_BYTES:
+ cut_stream = cut_bytes;
+ break;
+
+ case CUT_MODE_CHARACTERS:
+ cut_stream = MB_CUR_MAX <= 1 ? cut_bytes : cut_characters;
+ break;
+
+ case CUT_MODE_FIELDS:
+ cut_stream = cut_fields;
+ break;
+ }
+ affirm (cut_stream);
if (optind == argc)
ok = cut_file ("-", cut_stream);
else
push @new, ["$test_name-mb", @new_t, {ENV => "LC_ALL=$mb_locale"}];
}
push @Tests, @new;
+
+ push @Tests,
+ ['mb-char-1', '-c1', {IN=>"\xc3\xa9x\n"}, {OUT=>"\xc3\xa9\n"},
+ {ENV => "LC_ALL=$mb_locale"}],
+ ['mb-char-2', '-c2', {IN=>"\xc3\xa9x\n"}, {OUT=>"x\n"},
+ {ENV => "LC_ALL=$mb_locale"}],
+ ['mb-char-3', '-c1,3', '--output-d=:',
+ {IN=>"\xc3\xa9a\xe2\x82\xacb\n"}, {OUT=>"\xc3\xa9:\xe2\x82\xac\n"},
+ {ENV => "LC_ALL=$mb_locale"}],
+ ['mb-char-4', '-c1-2', {IN=>"\xc3x\n"}, {OUT=>"\xc3x\n"},
+ {ENV => "LC_ALL=$mb_locale"}];
}