From: Paul Eggert Date: Sun, 10 Oct 2021 22:59:56 +0000 (-0700) Subject: sort: fix unlikely bug when '\377' < 0 X-Git-Tag: v9.1~204 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=6cafb122fa214baa96a67fbbd2dab6c77a961e0a;p=thirdparty%2Fcoreutils.git sort: fix unlikely bug when '\377' < 0 * gl/lib/strintcmp.c (strintcmp): Don’t assume that the input cannot contain ((char) -1), as this equals '\377' when char is signed (assuming 8-bit char). * src/sort.c (decimal_point): Now char, to make it clear that it’s always in char range now. (NON_CHAR): New constant. (traverse_raw_number): Return char not unsigned char; this is simpler and could be faster. All callers changed. (main): Do not convert decimal_point and thousands_sep to unsigned char, as this can mishandle comparisons on machines where char is signed and the input data contains ((char) -1). Use NON_CHAR, not -1, as an out-of-range value for thousands_sep. --- diff --git a/gl/lib/strintcmp.c b/gl/lib/strintcmp.c index 0ecf6d02c6..215158627f 100644 --- a/gl/lib/strintcmp.c +++ b/gl/lib/strintcmp.c @@ -21,6 +21,8 @@ #include "strnumcmp-in.h" +#include + /* Compare strings A and B as integers without explicitly converting them to machine numbers, to avoid overflow problems and perhaps improve performance. */ @@ -28,5 +30,5 @@ int strintcmp (char const *a, char const *b) { - return numcompare (a, b, -1, -1); + return numcompare (a, b, CHAR_MAX + 1, CHAR_MAX + 1); } diff --git a/gl/lib/strnumcmp-in.h b/gl/lib/strnumcmp-in.h index 7f2d4bbe6a..f1b03689c5 100644 --- a/gl/lib/strnumcmp-in.h +++ b/gl/lib/strnumcmp-in.h @@ -106,8 +106,8 @@ fraccompare (char const *a, char const *b, char decimal_point) /* Compare strings A and B as numbers without explicitly converting them to machine numbers, to avoid overflow problems and perhaps improve performance. DECIMAL_POINT is the decimal point and - THOUSANDS_SEP the thousands separator. A DECIMAL_POINT of -1 - causes comparisons to act as if there is no decimal point + THOUSANDS_SEP the thousands separator. A DECIMAL_POINT outside + 'char' range causes comparisons to act as if there is no decimal point character, and likewise for THOUSANDS_SEP. */ static inline int _GL_ATTRIBUTE_PURE diff --git a/src/sort.c b/src/sort.c index 2979400e76..d32dcfb02a 100644 --- a/src/sort.c +++ b/src/sort.c @@ -152,9 +152,9 @@ enum }; /* The representation of the decimal point in the current locale. */ -static int decimal_point; +static char decimal_point; -/* Thousands separator; if -1, then there isn't one. */ +/* Thousands separator; if outside char range, there is no separator. */ static int thousands_sep; /* Nonzero if the corresponding locales are hard. */ @@ -338,6 +338,9 @@ static bool reverse; they were read if all keys compare equal. */ static bool stable; +/* An int value outside char range. */ +enum { NON_CHAR = CHAR_MAX + 1 }; + /* If TAB has this value, blanks separate fields. */ enum { TAB_DEFAULT = CHAR_MAX + 1 }; @@ -1900,12 +1903,12 @@ static char const unit_order[UCHAR_LIM] = decimal_point chars only. Returns the highest digit found in the number, or '\0' if no digit has been found. Upon return *number points at the character that immediately follows after the given number. */ -static unsigned char +static char traverse_raw_number (char const **number) { char const *p = *number; - unsigned char ch; - unsigned char max_digit = '\0'; + char ch; + char max_digit = '\0'; bool ends_with_thousands_sep = false; /* Scan to end of number. @@ -1953,7 +1956,7 @@ find_unit_order (char const *number) { bool minus_sign = (*number == '-'); char const *p = number + minus_sign; - unsigned char max_digit = traverse_raw_number (&p); + char max_digit = traverse_raw_number (&p); if ('0' < max_digit) { unsigned char ch = *p; @@ -2334,7 +2337,7 @@ debug_key (struct line const *line, struct keyfield const *key) else if (key->numeric || key->human_numeric) { char const *p = beg + (beg < lim && *beg == '-'); - unsigned char max_digit = traverse_raw_number (&p); + char max_digit = traverse_raw_number (&p); if ('0' <= max_digit) { unsigned char ch = *p; @@ -4229,14 +4232,14 @@ main (int argc, char **argv) /* If the locale doesn't define a decimal point, or if the decimal point is multibyte, use the C locale's decimal point. FIXME: add support for multibyte decimal points. */ - decimal_point = to_uchar (locale->decimal_point[0]); + decimal_point = locale->decimal_point[0]; if (! decimal_point || locale->decimal_point[1]) decimal_point = '.'; /* FIXME: add support for multibyte thousands separators. */ - thousands_sep = to_uchar (*locale->thousands_sep); + thousands_sep = locale->thousands_sep[0]; if (! thousands_sep || locale->thousands_sep[1]) - thousands_sep = -1; + thousands_sep = NON_CHAR; } have_read_stdin = false;