#include "argmatch.h"
#include "assure.h"
+#include "c-ctype.h"
#include "fadvise.h"
#include "getndelim2.h"
#include "ioblksize.h"
#include "mbbuf.h"
+#include "memchr2.h"
#include "set-fields.h"
struct bytesearch_context
{
enum bytesearch_mode mode;
+ bool blank_delimited;
char *line_end;
bool line_end_known;
};
idx_t field_len = ctx->line_end ? ctx->line_end - buf : len;
- char *field_end = find_bytesearch_field_delim (buf, field_len);
+ char *field_end = (ctx->blank_delimited
+ ? memchr2 (buf, ' ', '\t', field_len)
+ : find_bytesearch_field_delim (buf, field_len));
if (field_end)
{
bool found_any_selected_field = false;
bool have_pending_line = false;
bool skip_line_remainder = false;
+ bool skip_blank_run = false;
bool write_field;
idx_t field_1_n_bytes = 0;
- idx_t overlap = delim_length - 1;
+ idx_t overlap = whitespace_delimited ? 0 : delim_length - 1;
current_rp = frp;
buffer_first_field = suppress_non_delimited ^ !print_kth (1);
}
char *chunk = mbbuf.buffer + mbbuf.offset;
- struct bytesearch_context search = { .mode = BYTESEARCH_FIELDS };
+ struct bytesearch_context search =
+ {
+ .mode = BYTESEARCH_FIELDS,
+ .blank_delimited = whitespace_delimited
+ };
while (processed < safe)
{
char *terminator = NULL;
+ if (skip_blank_run)
+ {
+ while (processed < safe && c_isblank (chunk[processed]))
+ processed++;
+
+ if (processed == safe)
+ break;
+
+ skip_blank_run = false;
+ }
+
if (skip_line_remainder)
{
search.mode = BYTESEARCH_LINE_ONLY;
field_1_n_bytes = 0;
}
- processed += delim_length;
+ processed += whitespace_delimited ? 1 : delim_length;
next_item (&field_idx);
write_field = begin_field_output (field_idx, buffer_first_field,
&found_any_selected_field);
skip_line_remainder = true;
}
+ else if (whitespace_delimited)
+ skip_blank_run = true;
}
else if (terminator_kind == FIELD_LINE_DELIMITER)
{
static void
cut_fields_ws (FILE *stream)
{
- cut_fields_mb_any (stream, true);
+ if (MB_CUR_MAX <= 1 && !trim_outer_whitespace)
+ cut_fields_bytesearch (stream);
+ else
+ cut_fields_mb_any (stream, true);
}
/* Process file FILE to standard output, using CUT_STREAM.