From: Paul Eggert <eggert@cs.ucla.edu>
Date: Wed, 15 Sep 2021 21:09:03 +0000 (-0700)
Subject: maint: prefer rawmemchr to memchr when easy
X-Git-Tag: v9.0~27
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=2715aba08a381a6099c1c6b054995e6b3df785c8;p=thirdparty%2Fcoreutils.git

maint: prefer rawmemchr to memchr when easy

* bootstrap.conf (gnulib_modules): Add rawmemchr.
* src/csplit.c: Include idx.h.
* src/csplit.c (record_line_starts):
* src/head.c (elide_tail_lines_pipe):
* src/shuf.c (next_line):
* src/split.c (lines_split):
* src/tail.c (pipe_lines):
* src/wc.c (wc_lines):
Prefer rawmemchr to memchr when rawmemchr is easy.
* src/csplit.c (load_buffer):
* src/head.c (struct linebuffer):
Make room for a 1-byte sentinel.
---

diff --git a/bootstrap.conf b/bootstrap.conf
index 481a37e9c3..bcfc6f0a0c 100644
--- a/bootstrap.conf
+++ b/bootstrap.conf
@@ -210,6 +210,7 @@ gnulib_modules="
   quotearg
   randint
   randperm
+  rawmemchr
   read-file
   readlink
   readtokens
diff --git a/src/csplit.c b/src/csplit.c
index e1fb66ed24..a7191fedd2 100644
--- a/src/csplit.c
+++ b/src/csplit.c
@@ -31,6 +31,7 @@
 #include "die.h"
 #include "error.h"
 #include "fd-reopen.h"
+#include "idx.h"
 #include "quote.h"
 #include "safe-read.h"
 #include "stdio--.h"
@@ -344,8 +345,6 @@ static size_t
 record_line_starts (struct buffer_record *b)
 {
   char *line_start;		/* Start of current line. */
-  char *line_end;		/* End of each line found. */
-  size_t bytes_left;		/* Length of incomplete last line. */
   size_t lines;			/* Number of lines found. */
   size_t line_length;		/* Length of each line found. */
 
@@ -354,21 +353,22 @@ record_line_starts (struct buffer_record *b)
 
   lines = 0;
   line_start = b->buffer;
-  bytes_left = b->bytes_used;
+  char *buffer_end = line_start + b->bytes_used;
+  *buffer_end = '\n';
 
   while (true)
     {
-      line_end = memchr (line_start, '\n', bytes_left);
-      if (line_end == NULL)
+      char *line_end = rawmemchr (line_start, '\n');
+      if (line_end == buffer_end)
         break;
       line_length = line_end - line_start + 1;
       keep_new_line (b, line_start, line_length);
-      bytes_left -= line_length;
       line_start = line_end + 1;
       lines++;
     }
 
   /* Check for an incomplete last line. */
+  idx_t bytes_left = buffer_end - line_start;
   if (bytes_left)
     {
       if (have_read_eof)
@@ -492,9 +492,10 @@ load_buffer (void)
     return false;
 
   /* We must make the buffer at least as large as the amount of data
-     in the partial line left over from the last call. */
-  if (bytes_wanted < hold_count)
-    bytes_wanted = hold_count;
+     in the partial line left over from the last call,
+     plus room for a sentinel '\n'. */
+  if (bytes_wanted <= hold_count)
+    bytes_wanted = hold_count + 1;
 
   while (true)
     {
@@ -512,7 +513,7 @@ load_buffer (void)
           hold_count = 0;
         }
 
-      b->bytes_used += read_input (p, bytes_avail);
+      b->bytes_used += read_input (p, bytes_avail - 1);
 
       lines_found = record_line_starts (b);
 
diff --git a/src/head.c b/src/head.c
index 7b2a440418..04d0cd8aa9 100644
--- a/src/head.c
+++ b/src/head.c
@@ -500,7 +500,7 @@ elide_tail_lines_pipe (char const *filename, int fd, uintmax_t n_elide,
 {
   struct linebuffer
   {
-    char buffer[BUFSIZ];
+    char buffer[BUFSIZ + 1];
     size_t nbytes;
     size_t nlines;
     struct linebuffer *next;
@@ -539,9 +539,10 @@ elide_tail_lines_pipe (char const *filename, int fd, uintmax_t n_elide,
 
       /* Count the number of newlines just read.  */
       {
-        char const *buffer_end = tmp->buffer + n_read;
+        char *buffer_end = tmp->buffer + n_read;
+        *buffer_end = line_end;
         char const *p = tmp->buffer;
-        while ((p = memchr (p, line_end, buffer_end - p)))
+        while ((p = rawmemchr (p, line_end)) < buffer_end)
           {
             ++p;
             ++tmp->nlines;
diff --git a/src/shuf.c b/src/shuf.c
index 1af1b533ad..553b293503 100644
--- a/src/shuf.c
+++ b/src/shuf.c
@@ -134,13 +134,13 @@ input_from_argv (char **operand, int n_operands, char eolbyte)
   operand[n_operands] = p;
 }
 
-/* Return the start of the next line after LINE.  The current line
-   ends in EOLBYTE, and is guaranteed to end before LINE + N.  */
+/* Return the start of the next line after LINE, which is guaranteed
+   to end in EOLBYTE.  */
 
 static char *
-next_line (char *line, char eolbyte, size_t n)
+next_line (char *line, char eolbyte)
 {
-  char *p = memchr (line, eolbyte, n);
+  char *p = rawmemchr (line, eolbyte);
   return p + 1;
 }
 
@@ -284,14 +284,14 @@ read_input (FILE *in, char eolbyte, char ***pline)
   lim = buf + used;
 
   n_lines = 0;
-  for (p = buf; p < lim; p = next_line (p, eolbyte, lim - p))
+  for (p = buf; p < lim; p = next_line (p, eolbyte))
     n_lines++;
 
   *pline = line = xnmalloc (n_lines + 1, sizeof *line);
 
   line[0] = p = buf;
   for (size_t i = 1; i <= n_lines; i++)
-    line[i] = p = next_line (p, eolbyte, lim - p);
+    line[i] = p = next_line (p, eolbyte);
 
   return n_lines;
 }
diff --git a/src/split.c b/src/split.c
index 6062f052a1..4b1b144d06 100644
--- a/src/split.c
+++ b/src/split.c
@@ -716,7 +716,7 @@ lines_split (uintmax_t n_lines, char *buf, size_t bufsize)
       *eob = eolchar;
       while (true)
         {
-          bp = memchr (bp, eolchar, eob - bp + 1);
+          bp = rawmemchr (bp, eolchar);
           if (bp == eob)
             {
               if (eob != bp_out) /* do not write 0 bytes! */
diff --git a/src/tail.c b/src/tail.c
index 99977afa7b..eb15b933f3 100644
--- a/src/tail.c
+++ b/src/tail.c
@@ -713,8 +713,7 @@ pipe_lines (char const *pretty_filename, int fd, uintmax_t n_lines,
         size_t j;
         for (j = total_lines - n_lines; j; --j)
           {
-            beg = memchr (beg, line_end, buffer_end - beg);
-            assert (beg);
+            beg = rawmemchr (beg, line_end);
             ++beg;
           }
       }
diff --git a/src/wc.c b/src/wc.c
index bdb51928d8..ec2a4e1841 100644
--- a/src/wc.c
+++ b/src/wc.c
@@ -319,8 +319,9 @@ wc_lines (char const *file, int fd, uintmax_t *lines_out, uintmax_t *bytes_out)
         }
       else
         {
-          /* memchr is more efficient with longer lines.  */
-          while ((p = memchr (p, '\n', end - p)))
+          /* rawmemchr is more efficient with longer lines.  */
+          *end = '\n';
+          while ((p = rawmemchr (p, '\n')) < end)
             {
               ++p;
               ++lines;