From: Tobias Stoeckmann Date: Mon, 26 Mar 2018 11:37:12 +0000 (+0200) Subject: cut: improve large file support on 32 bit X-Git-Tag: v8.30~46 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=d1a754c827251d224ae5b7dda7f8d8c26d5940b5;p=thirdparty%2Fcoreutils.git cut: improve large file support on 32 bit Increase max range from SIZE_MAX to UINTMAX_MAX, which will allow cut to support line lengths up to the max file size on all systems. The inherent SIZE_MAX limitation in cut was removed with the enhancements in https://bugs.gnu.org/13127. Also numfmt gets similarly increased --field ranges due to shared code. * src/cut.c: s/size_t/uintmax_t/. * src/numfmt.c: Likewise. * src/set-fields.c: Likewise. * src/set-fields.h: Likewise. * tests/misc/cut-huge-range.sh: Adjust accordingly. * tests/misc/numfmt.pl: Likewise. * NEWS: Mention the improvement. --- diff --git a/NEWS b/NEWS index 207e785b3d..34652b59d4 100644 --- a/NEWS +++ b/NEWS @@ -30,6 +30,9 @@ GNU coreutils NEWS -*- outline -*- ** Improvements + cut supports line lengths up to the max file size on 32 bit systems. + Previously only offsets up to SIZE_MAX-1 were supported. + stat and tail now know about the "exfs" file system, which is a version of XFS. stat -f --format=%T now reports the file system type, and tail -f uses inotify. diff --git a/src/cut.c b/src/cut.c index be2e67af4f..5dc511af3d 100644 --- a/src/cut.c +++ b/src/cut.c @@ -204,7 +204,7 @@ Each range is one of:\n\ and if required CURRENT_RP. */ static inline void -next_item (size_t *item_idx) +next_item (uintmax_t *item_idx) { (*item_idx)++; if ((*item_idx) > current_rp->hi) @@ -214,7 +214,7 @@ next_item (size_t *item_idx) /* Return nonzero if the K'th field or byte is printable. */ static inline bool -print_kth (size_t k) +print_kth (uintmax_t k) { return current_rp->lo <= k; } @@ -222,7 +222,7 @@ print_kth (size_t k) /* Return nonzero if K'th byte is the beginning of a range. */ static inline bool -is_range_start_index (size_t k) +is_range_start_index (uintmax_t k) { return k == current_rp->lo; } @@ -232,7 +232,7 @@ is_range_start_index (size_t k) static void cut_bytes (FILE *stream) { - size_t byte_idx; /* Number of bytes in the line so far. */ + uintmax_t byte_idx; /* Number of bytes in the line so far. */ /* Whether to begin printing delimiters between ranges for the current line. Set after we've begun printing data corresponding to the first range. */ bool print_delimiter; @@ -286,7 +286,7 @@ static void cut_fields (FILE *stream) { int c; - size_t field_idx = 1; + uintmax_t field_idx = 1; bool found_any_selected_field = false; bool buffer_first_field; diff --git a/src/numfmt.c b/src/numfmt.c index 130e0388d7..ce5c131e45 100644 --- a/src/numfmt.c +++ b/src/numfmt.c @@ -1351,13 +1351,13 @@ next_field (char **line) } static bool _GL_ATTRIBUTE_PURE -include_field (size_t field) +include_field (uintmax_t field) { struct field_range_pair *p = frp; if (!p) return field == 1; - while (p->lo != SIZE_MAX) + while (p->lo != UINTMAX_MAX) { if (p->lo <= field && p->hi >= field) return true; @@ -1369,7 +1369,7 @@ include_field (size_t field) /* Convert and output the given field. If it is not included in the set of fields to process just output the original */ static bool -process_field (char *text, size_t field) +process_field (char *text, uintmax_t field) { long double val = 0; size_t precision = 0; @@ -1400,7 +1400,7 @@ static int process_line (char *line, bool newline) { char *next; - size_t field = 0; + uintmax_t field = 0; bool valid_number = true; while (true) { diff --git a/src/set-fields.c b/src/set-fields.c index 20687b2931..0119e3f998 100644 --- a/src/set-fields.c +++ b/src/set-fields.c @@ -45,7 +45,7 @@ static size_t n_frp_allocated; space if necessary. Update global variable N_FRP. When allocating, update global variable N_FRP_ALLOCATED. */ static void -add_range_pair (size_t lo, size_t hi) +add_range_pair (uintmax_t lo, uintmax_t hi) { if (n_frp == n_frp_allocated) frp = X2NREALLOC (frp, &n_frp_allocated); @@ -89,8 +89,8 @@ complement_rp (void) add_range_pair (c[i-1].hi + 1, c[i].lo - 1); } - if (c[n-1].hi < SIZE_MAX) - add_range_pair (c[n-1].hi + 1, SIZE_MAX); + if (c[n-1].hi < UINTMAX_MAX) + add_range_pair (c[n-1].hi + 1, UINTMAX_MAX); free (c); } @@ -100,7 +100,7 @@ complement_rp (void) be composed of one or more numbers or ranges of numbers, separated by blanks or commas. Incomplete ranges may be given: '-m' means '1-m'; 'n-' means 'n' through end of line. - n=0 and n>=SIZE_MAX values will trigger an error. + n=0 and n>=UINTMAX_MAX values will trigger an error. if SETFLD_ALLOW_DASH option is used, a single '-' means all fields (otherwise a single dash triggers an error). @@ -121,24 +121,24 @@ complement_rp (void) The first field is stored as 1 (zero is not used). An open-ended range (i.e., until the last field of the input line) - is indicated with hi = SIZE_MAX. + is indicated with hi = UINTMAX_MAX. - A sentinel of SIZE_MAX/SIZE_MAX is always added as the last + A sentinel of UINTMAX_MAX/UINTMAX_MAX is always added as the last field range pair. Examples: - given '1-2,4', frp = [ { .lo = 1, .hi = 2 }, - { .lo = 4, .hi = 4 }, - { .lo = SIZE_MAX, .hi = SIZE_MAX } ]; + given '1-2,4', frp = [ { .lo = 1, .hi = 2 }, + { .lo = 4, .hi = 4 }, + { .lo = UINTMAX_MAX, .hi = UINTMAX_MAX } ]; - given '3-', frp = [ { .lo = 3, .hi = SIZE_MAX }, - { .lo = SIZE_MAX, .hi = SIZE_MAX } ]; + given '3-', frp = [ { .lo = 3, .hi = UINTMAX_MAX }, + { .lo = UINTMAX_MAX, .hi = UINTMAX_MAX } ]; */ void set_fields (const char *fieldstr, unsigned int options) { - size_t initial = 1; /* Value of first number in a range. */ - size_t value = 0; /* If nonzero, a number being accumulated. */ + uintmax_t initial = 1; /* Value of first number in a range. */ + uintmax_t value = 0; /* If nonzero, a number being accumulated. */ bool lhs_specified = false; bool rhs_specified = false; bool dash_found = false; /* True if a '-' is found in this field. */ @@ -201,7 +201,7 @@ set_fields (const char *fieldstr, unsigned int options) if (!rhs_specified) { /* 'n-'. From 'initial' to end of line. */ - add_range_pair (initial, SIZE_MAX); + add_range_pair (initial, UINTMAX_MAX); } else { @@ -247,8 +247,8 @@ set_fields (const char *fieldstr, unsigned int options) lhs_specified = 1; /* Detect overflow. */ - if (!DECIMAL_DIGIT_ACCUMULATE (value, *fieldstr - '0', size_t) - || value == SIZE_MAX) + if (!DECIMAL_DIGIT_ACCUMULATE (value, *fieldstr - '0', uintmax_t) + || value == UINTMAX_MAX) { /* In case the user specified -c$(echo 2^64|bc),22, complain only about the first number. */ @@ -307,7 +307,7 @@ set_fields (const char *fieldstr, unsigned int options) and for performance reasons. */ ++n_frp; frp = xrealloc (frp, n_frp * sizeof (struct field_range_pair)); - frp[n_frp - 1].lo = frp[n_frp - 1].hi = SIZE_MAX; + frp[n_frp - 1].lo = frp[n_frp - 1].hi = UINTMAX_MAX; } void diff --git a/src/set-fields.h b/src/set-fields.h index b9bab1fd58..06f5ba8f3e 100644 --- a/src/set-fields.h +++ b/src/set-fields.h @@ -19,8 +19,8 @@ struct field_range_pair { - size_t lo; - size_t hi; + uintmax_t lo; + uintmax_t hi; }; /* Array of `struct range_pair' holding all the finite ranges. */ diff --git a/tests/misc/cut-huge-range.sh b/tests/misc/cut-huge-range.sh index e7c17c2229..76ca965e39 100755 --- a/tests/misc/cut-huge-range.sh +++ b/tests/misc/cut-huge-range.sh @@ -44,11 +44,9 @@ subtract_one=' ' # Ensure we can cut up to our sentinel value. -# This is currently SIZE_MAX, but could be raised to UINTMAX_MAX -# if we didn't allocate memory for each line as a unit. # Don't use expr to subtract one, -# since SIZE_MAX may exceed its maximum value. -CUT_MAX=$(echo $SIZE_MAX | sed "$subtract_one") +# since UINTMAX_MAX may exceed its maximum value. +CUT_MAX=$(echo $UINTMAX_MAX | sed "$subtract_one") # From coreutils-8.10 through 8.20, this would make cut try to allocate # a 256MiB bit vector. @@ -59,8 +57,10 @@ CUT_MAX=$(echo $SIZE_MAX | sed "$subtract_one") (ulimit -v $vm && cut -b1-$CUT_MAX /dev/null >> err 2>&1) || fail=1 # Explicitly disallow values above CUT_MAX -(ulimit -v $vm && returns_ 1 cut -b$SIZE_MAX /dev/null 2>/dev/null) || fail=1 -(ulimit -v $vm && returns_ 1 cut -b$SIZE_OFLOW /dev/null 2>/dev/null) || fail=1 +(ulimit -v $vm && returns_ 1 cut -b$UINTMAX_MAX /dev/null 2>/dev/null) || + fail=1 +(ulimit -v $vm && returns_ 1 cut -b$UINTMAX_OFLOW /dev/null 2>/dev/null) || + fail=1 compare /dev/null err || fail=1 diff --git a/tests/misc/numfmt.pl b/tests/misc/numfmt.pl index 6b36233995..94a7caf9d7 100755 --- a/tests/misc/numfmt.pl +++ b/tests/misc/numfmt.pl @@ -308,9 +308,9 @@ my @Tests = {EXIT=>1}, {ERR=>"$prog: invalid field range\n$try"}], ['field-range-err-12','--field 0-1 --to=si 10', {EXIT=>1}, {ERR=>"$prog: fields are numbered from 1\n$try"}], - ['field-range-err-13','--field '.$limits->{SIZE_MAX}.',22 --to=si 10', + ['field-range-err-13','--field '.$limits->{UINTMAX_MAX}.',22 --to=si 10', {EXIT=>1}, {ERR=>"$prog: field number " . - "'".$limits->{SIZE_MAX}."' is too large\n$try"}], + "'".$limits->{UINTMAX_MAX}."' is too large\n$try"}], # Auto-consume white-space, setup auto-padding ['whitespace-1', '--to=si --field 2 "A 500 B"', {OUT=>"A 500 B"}],