From: Paul Eggert Date: Wed, 15 Sep 2021 21:09:03 +0000 (-0700) Subject: maint: prefer rawmemchr to memchr when easy X-Git-Tag: v9.0~27 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=2715aba08a381a6099c1c6b054995e6b3df785c8;p=thirdparty%2Fcoreutils.git maint: prefer rawmemchr to memchr when easy * bootstrap.conf (gnulib_modules): Add rawmemchr. * src/csplit.c: Include idx.h. * src/csplit.c (record_line_starts): * src/head.c (elide_tail_lines_pipe): * src/shuf.c (next_line): * src/split.c (lines_split): * src/tail.c (pipe_lines): * src/wc.c (wc_lines): Prefer rawmemchr to memchr when rawmemchr is easy. * src/csplit.c (load_buffer): * src/head.c (struct linebuffer): Make room for a 1-byte sentinel. --- diff --git a/bootstrap.conf b/bootstrap.conf index 481a37e9c3..bcfc6f0a0c 100644 --- a/bootstrap.conf +++ b/bootstrap.conf @@ -210,6 +210,7 @@ gnulib_modules=" quotearg randint randperm + rawmemchr read-file readlink readtokens diff --git a/src/csplit.c b/src/csplit.c index e1fb66ed24..a7191fedd2 100644 --- a/src/csplit.c +++ b/src/csplit.c @@ -31,6 +31,7 @@ #include "die.h" #include "error.h" #include "fd-reopen.h" +#include "idx.h" #include "quote.h" #include "safe-read.h" #include "stdio--.h" @@ -344,8 +345,6 @@ static size_t record_line_starts (struct buffer_record *b) { char *line_start; /* Start of current line. */ - char *line_end; /* End of each line found. */ - size_t bytes_left; /* Length of incomplete last line. */ size_t lines; /* Number of lines found. */ size_t line_length; /* Length of each line found. */ @@ -354,21 +353,22 @@ record_line_starts (struct buffer_record *b) lines = 0; line_start = b->buffer; - bytes_left = b->bytes_used; + char *buffer_end = line_start + b->bytes_used; + *buffer_end = '\n'; while (true) { - line_end = memchr (line_start, '\n', bytes_left); - if (line_end == NULL) + char *line_end = rawmemchr (line_start, '\n'); + if (line_end == buffer_end) break; line_length = line_end - line_start + 1; keep_new_line (b, line_start, line_length); - bytes_left -= line_length; line_start = line_end + 1; lines++; } /* Check for an incomplete last line. */ + idx_t bytes_left = buffer_end - line_start; if (bytes_left) { if (have_read_eof) @@ -492,9 +492,10 @@ load_buffer (void) return false; /* We must make the buffer at least as large as the amount of data - in the partial line left over from the last call. */ - if (bytes_wanted < hold_count) - bytes_wanted = hold_count; + in the partial line left over from the last call, + plus room for a sentinel '\n'. */ + if (bytes_wanted <= hold_count) + bytes_wanted = hold_count + 1; while (true) { @@ -512,7 +513,7 @@ load_buffer (void) hold_count = 0; } - b->bytes_used += read_input (p, bytes_avail); + b->bytes_used += read_input (p, bytes_avail - 1); lines_found = record_line_starts (b); diff --git a/src/head.c b/src/head.c index 7b2a440418..04d0cd8aa9 100644 --- a/src/head.c +++ b/src/head.c @@ -500,7 +500,7 @@ elide_tail_lines_pipe (char const *filename, int fd, uintmax_t n_elide, { struct linebuffer { - char buffer[BUFSIZ]; + char buffer[BUFSIZ + 1]; size_t nbytes; size_t nlines; struct linebuffer *next; @@ -539,9 +539,10 @@ elide_tail_lines_pipe (char const *filename, int fd, uintmax_t n_elide, /* Count the number of newlines just read. */ { - char const *buffer_end = tmp->buffer + n_read; + char *buffer_end = tmp->buffer + n_read; + *buffer_end = line_end; char const *p = tmp->buffer; - while ((p = memchr (p, line_end, buffer_end - p))) + while ((p = rawmemchr (p, line_end)) < buffer_end) { ++p; ++tmp->nlines; diff --git a/src/shuf.c b/src/shuf.c index 1af1b533ad..553b293503 100644 --- a/src/shuf.c +++ b/src/shuf.c @@ -134,13 +134,13 @@ input_from_argv (char **operand, int n_operands, char eolbyte) operand[n_operands] = p; } -/* Return the start of the next line after LINE. The current line - ends in EOLBYTE, and is guaranteed to end before LINE + N. */ +/* Return the start of the next line after LINE, which is guaranteed + to end in EOLBYTE. */ static char * -next_line (char *line, char eolbyte, size_t n) +next_line (char *line, char eolbyte) { - char *p = memchr (line, eolbyte, n); + char *p = rawmemchr (line, eolbyte); return p + 1; } @@ -284,14 +284,14 @@ read_input (FILE *in, char eolbyte, char ***pline) lim = buf + used; n_lines = 0; - for (p = buf; p < lim; p = next_line (p, eolbyte, lim - p)) + for (p = buf; p < lim; p = next_line (p, eolbyte)) n_lines++; *pline = line = xnmalloc (n_lines + 1, sizeof *line); line[0] = p = buf; for (size_t i = 1; i <= n_lines; i++) - line[i] = p = next_line (p, eolbyte, lim - p); + line[i] = p = next_line (p, eolbyte); return n_lines; } diff --git a/src/split.c b/src/split.c index 6062f052a1..4b1b144d06 100644 --- a/src/split.c +++ b/src/split.c @@ -716,7 +716,7 @@ lines_split (uintmax_t n_lines, char *buf, size_t bufsize) *eob = eolchar; while (true) { - bp = memchr (bp, eolchar, eob - bp + 1); + bp = rawmemchr (bp, eolchar); if (bp == eob) { if (eob != bp_out) /* do not write 0 bytes! */ diff --git a/src/tail.c b/src/tail.c index 99977afa7b..eb15b933f3 100644 --- a/src/tail.c +++ b/src/tail.c @@ -713,8 +713,7 @@ pipe_lines (char const *pretty_filename, int fd, uintmax_t n_lines, size_t j; for (j = total_lines - n_lines; j; --j) { - beg = memchr (beg, line_end, buffer_end - beg); - assert (beg); + beg = rawmemchr (beg, line_end); ++beg; } } diff --git a/src/wc.c b/src/wc.c index bdb51928d8..ec2a4e1841 100644 --- a/src/wc.c +++ b/src/wc.c @@ -319,8 +319,9 @@ wc_lines (char const *file, int fd, uintmax_t *lines_out, uintmax_t *bytes_out) } else { - /* memchr is more efficient with longer lines. */ - while ((p = memchr (p, '\n', end - p))) + /* rawmemchr is more efficient with longer lines. */ + *end = '\n'; + while ((p = rawmemchr (p, '\n')) < end) { ++p; ++lines;