From: Paul Eggert Date: Fri, 8 Nov 2024 08:01:01 +0000 (-0800) Subject: shuf: prefer xpalloc to xnrealloc X-Git-Tag: v9.6~91 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=cd340ab21f6ed12506d8c5d85a6b2a7cf4a3c310;p=thirdparty%2Fcoreutils.git shuf: prefer xpalloc to xnrealloc * src/shuf.c (RESERVOIR_LINES_INCREMENT): Remove. All uses removed. (read_input_reservoir_sampling, main): Prefer idx_t to size_t for sizes related to xpalloc. (read_input_reservoir_sampling): Prefer xpalloc to xnrealloc. --- diff --git a/src/shuf.c b/src/shuf.c index 63c26f31ee..0956abf536 100644 --- a/src/shuf.c +++ b/src/shuf.c @@ -37,9 +37,6 @@ #define AUTHORS proper_name ("Paul Eggert") -/* For reservoir-sampling, allocate the reservoir lines in batches. */ -enum { RESERVOIR_LINES_INCREMENT = 1024 }; - /* reservoir-sampling introduces CPU overhead for small inputs. So only enable it for inputs >= this limit. This limit was determined using these commands: @@ -168,33 +165,30 @@ input_size (void) /* Read all lines and store up to K permuted lines in *OUT_RSRV. Return the number of lines read, up to a maximum of K. */ -static size_t -read_input_reservoir_sampling (FILE *in, char eolbyte, size_t k, +static idx_t +read_input_reservoir_sampling (FILE *in, char eolbyte, idx_t k, struct randint_source *s, struct linebuffer **out_rsrv) { randint n_lines = 0; - size_t n_alloc_lines = MIN (k, RESERVOIR_LINES_INCREMENT); + idx_t n_alloc_lines = 0; struct linebuffer *line = nullptr; - struct linebuffer *rsrv; - - rsrv = xcalloc (n_alloc_lines, sizeof (struct linebuffer)); + struct linebuffer *rsrv = nullptr; /* Fill the first K lines, directly into the reservoir. */ - while (n_lines < k - && (line = - readlinebuffer_delim (&rsrv[n_lines], in, eolbyte)) != nullptr) + for (n_lines = 0; n_lines < k; n_lines++) { - n_lines++; - - /* Enlarge reservoir. */ - if (n_lines >= n_alloc_lines) + /* Enlarge reservoir if needed. */ + if (n_lines == n_alloc_lines) { - n_alloc_lines += RESERVOIR_LINES_INCREMENT; - rsrv = xnrealloc (rsrv, n_alloc_lines, sizeof (struct linebuffer)); - memset (&rsrv[n_lines], 0, - RESERVOIR_LINES_INCREMENT * sizeof (struct linebuffer)); + idx_t old = n_alloc_lines; + rsrv = xpalloc (rsrv, &n_alloc_lines, 1, k, sizeof *rsrv); + memset (&rsrv[n_lines], 0, (n_alloc_lines - old) * sizeof *rsrv); } + + line = readlinebuffer_delim (&rsrv[n_lines], in, eolbyte); + if (!line) + break; } /* last line wasn't null - so there may be more lines to read. */ @@ -372,7 +366,7 @@ main (int argc, char **argv) bool input_range = false; size_t lo_input = SIZE_MAX; size_t hi_input = 0; - size_t head_lines = SIZE_MAX; + idx_t head_lines = MIN (IDX_MAX, SIZE_MAX); char const *outfile = nullptr; char *random_source = nullptr; char eolbyte = '\n'; @@ -523,7 +517,7 @@ main (int argc, char **argv) fadvise (stdin, FADVISE_SEQUENTIAL); - if (repeat || head_lines == SIZE_MAX + if (repeat || head_lines == MIN (IDX_MAX, SIZE_MAX) || input_size () <= RESERVOIR_MIN_INPUT) { n_lines = read_input (stdin, eolbyte, &input_lines); @@ -538,7 +532,7 @@ main (int argc, char **argv) /* The adjusted head line count; can be less than HEAD_LINES if the input is small and if not repeating. */ - size_t ahead_lines = repeat || head_lines < n_lines ? head_lines : n_lines; + idx_t ahead_lines = repeat || head_lines < n_lines ? head_lines : n_lines; randint_source = randint_all_new (random_source, (use_reservoir_sampling || repeat