/* Thousands separator; if outside char range, there is no separator. */
static int thousands_sep;
+/* We currently ignore multi-byte grouping chars. */
+static bool thousands_sep_ignored;
/* Nonzero if the corresponding locales are hard. */
static bool hard_LC_COLLATE;
struct keyfield const *key;
struct keyfield ugkey = *gkey;
unsigned long keynum = 1;
+ bool basic_numeric_field = false;
+ bool general_numeric_field = false;
+ bool basic_numeric_field_span = false;
+ bool general_numeric_field_span = false;
for (key = keylist; key; key = key->next, keynum++)
{
+ if (key_numeric (key))
+ {
+ if (key->general_numeric)
+ general_numeric_field = true;
+ else
+ basic_numeric_field = true;
+ }
+
if (key->traditional_used)
{
size_t sword = key->sword;
if (!sword)
sword++;
if (!eword || sword < eword)
- error (0, 0, _("key %lu is numeric and spans multiple fields"),
- keynum);
+ {
+ error (0, 0, _("key %lu is numeric and spans multiple fields"),
+ keynum);
+ if (key->general_numeric)
+ general_numeric_field_span = true;
+ else
+ basic_numeric_field_span = true;
+ }
}
/* Flag global options not copied or specified in any key. */
ugkey.reverse &= !key->reverse;
}
+ /* Explicitly warn if field delimiters in this locale
+ don't constrain numbers. */
+ bool number_locale_warned = false;
+ if (basic_numeric_field_span)
+ {
+ if (tab == TAB_DEFAULT
+ ? thousands_sep != NON_CHAR && (isblank (to_uchar (thousands_sep)))
+ : tab == thousands_sep)
+ {
+ error (0, 0,
+ _("field separator %s is treated as a "
+ "group separator in numbers"),
+ quote (((char []) {thousands_sep, 0})));
+ number_locale_warned = true;
+ }
+ }
+ if (basic_numeric_field_span || general_numeric_field_span)
+ {
+ if (tab == TAB_DEFAULT
+ ? thousands_sep != NON_CHAR && (isblank (to_uchar (decimal_point)))
+ : tab == decimal_point)
+ {
+ error (0, 0,
+ _("field separator %s is treated as a "
+ "decimal point in numbers"),
+ quote (((char []) {decimal_point, 0})));
+ number_locale_warned = true;
+ }
+ else if (tab == '-')
+ {
+ error (0, 0,
+ _("field separator %s is treated as a "
+ "minus sign in numbers"),
+ quote (((char []) {tab, 0})));
+ }
+ else if (general_numeric_field_span && tab == '+')
+ {
+ error (0, 0,
+ _("field separator %s is treated as a "
+ "plus sign in numbers"),
+ quote (((char []) {tab, 0})));
+ }
+ }
+
+ /* Explicitly indicate the decimal point used in this locale,
+ as it suggests that robust scripts need to consider
+ setting the locale when comparing numbers. */
+ if ((basic_numeric_field || general_numeric_field) && ! number_locale_warned)
+ {
+ error (0, 0,
+ _("%snumbers use %s as a decimal point in this locale"),
+ tab == decimal_point ? "" : _("note "),
+ quote (((char []) {decimal_point, 0})));
+
+ }
+
+ if (basic_numeric_field && thousands_sep_ignored)
+ {
+ error (0, 0,
+ _("the multi-byte number group separator "
+ "in this locale is not supported"));
+ }
+
/* Warn about ignored global options flagged above.
This clears all flags if UGKEY is the only one in the list. */
if (!default_key_compare (&ugkey)
/* FIXME: add support for multibyte thousands separators. */
thousands_sep = locale->thousands_sep[0];
+ if (thousands_sep && locale->thousands_sep[1])
+ thousands_sep_ignored = true;
if (! thousands_sep || locale->thousands_sep[1])
thousands_sep = NON_CHAR;
}
2
sort: text ordering performed using simple byte comparison
sort: key 1 has zero width and will be ignored
+sort: note numbers use '.' as a decimal point in this locale
3
sort: text ordering performed using simple byte comparison
sort: key 1 is numeric and spans multiple fields
+sort: note numbers use '.' as a decimal point in this locale
4
sort: text ordering performed using simple byte comparison
+sort: note numbers use '.' as a decimal point in this locale
sort: options '-bghMRrV' are ignored
5
sort: text ordering performed using simple byte comparison
+sort: note numbers use '.' as a decimal point in this locale
sort: options '-bghMRV' are ignored
sort: option '-r' only applies to last-resort comparison
6
sort: text ordering performed using simple byte comparison
+sort: note numbers use '.' as a decimal point in this locale
sort: option '-r' only applies to last-resort comparison
7
sort: text ordering performed using simple byte comparison
sort: leading blanks are significant in key 2; consider also specifying 'b'
+sort: note numbers use '.' as a decimal point in this locale
sort: options '-bg' are ignored
8
sort: text ordering performed using simple byte comparison
+sort: note numbers use '.' as a decimal point in this locale
9
sort: text ordering performed using simple byte comparison
+sort: note numbers use '.' as a decimal point in this locale
sort: option '-b' is ignored
10
sort: text ordering performed using simple byte comparison
+sort: note numbers use '.' as a decimal point in this locale
11
sort: text ordering performed using simple byte comparison
sort: leading blanks are significant in key 1; consider also specifying 'b'
EOF
echo 1 >> out
-sort -s -k2,1 --debug /dev/null 2>>out
+sort -s -k2,1 --debug /dev/null 2>>out || fail=1
echo 2 >> out
-sort -s -k2,1n --debug /dev/null 2>>out
+sort -s -k2,1n --debug /dev/null 2>>out || fail=1
echo 3 >> out
-sort -s -k1,2n --debug /dev/null 2>>out
+sort -s -k1,2n --debug /dev/null 2>>out || fail=1
echo 4 >> out
-sort -s -rRVMhgb -k1,1n --debug /dev/null 2>>out
+sort -s -rRVMhgb -k1,1n --debug /dev/null 2>>out || fail=1
echo 5 >> out
-sort -rRVMhgb -k1,1n --debug /dev/null 2>>out
+sort -rRVMhgb -k1,1n --debug /dev/null 2>>out || fail=1
echo 6 >> out
-sort -r -k1,1n --debug /dev/null 2>>out
+sort -r -k1,1n --debug /dev/null 2>>out || fail=1
echo 7 >> out
-sort -gbr -k1,1n -k1,1r --debug /dev/null 2>>out
+sort -gbr -k1,1n -k1,1r --debug /dev/null 2>>out || fail=1
echo 8 >> out
-sort -b -k1b,1bn --debug /dev/null 2>>out # no warning
+sort -b -k1b,1bn --debug /dev/null 2>>out || fail=1 # no warning
echo 9 >> out
-sort -b -k1,1bn --debug /dev/null 2>>out
+sort -b -k1,1bn --debug /dev/null 2>>out || fail=1
echo 10 >> out
-sort -b -k1,1bn -k2b,2 --debug /dev/null 2>>out # no warning
+sort -b -k1,1bn -k2b,2 --debug /dev/null 2>>out || fail=1 # no warning
echo 11 >> out
-sort -r -k1,1r --debug /dev/null 2>>out # no warning for redundant options
+sort -r -k1,1r --debug /dev/null 2>>out || fail=1 # no warning for redundant opt
echo 12 >> out
-sort -i -k1,1i --debug /dev/null 2>>out # no warning
+sort -i -k1,1i --debug /dev/null 2>>out || fail=1 # no warning
echo 13 >> out
-sort -d -k1,1b --debug /dev/null 2>>out
+sort -d -k1,1b --debug /dev/null 2>>out || fail=1
echo 14 >> out
-sort -i -k1,1d --debug /dev/null 2>>out
+sort -i -k1,1d --debug /dev/null 2>>out || fail=1
echo 15 >> out
-sort -r --debug /dev/null 2>>out #no warning
+sort -r --debug /dev/null 2>>out || fail=1 #no warning
echo 16 >> out
-sort -rM --debug /dev/null 2>>out #no warning
+sort -rM --debug /dev/null 2>>out || fail=1 #no warning
echo 17 >> out
-sort -rM -k1,1 --debug /dev/null 2>>out #no warning
-
+sort -rM -k1,1 --debug /dev/null 2>>out || fail=1 #no warning
compare exp out || fail=1
sort: failed to set locale
sort: text ordering performed using simple byte comparison
EOF
-
-LC_ALL=missing sort --debug /dev/null 2>out
-
+LC_ALL=missing sort --debug /dev/null 2>out || fail=1
# musl libc maps unknown locales to the default utf8 locale
# with no way to determine failures. This is discussed at:
# http://www.openwall.com/lists/musl/2016/04/02/1
sort: key 1 is numeric and spans multiple fields
sort: obsolescent key '+2 -1' used; consider '-k 3,1' instead
sort: key 2 has zero width and will be ignored
+sort: note numbers use '.' as a decimal point in this locale
sort: option '-b' is ignored
sort: option '-r' only applies to last-resort comparison
EOF
+sort --debug -rb -k2n +2.2 -1b /dev/null 2>out || fail=1
+compare exp out || fail=1
-sort --debug -rb -k2n +2.2 -1b /dev/null 2>out
+# check sign, decimal point, and grouping character warnings
+cat <<\EOF > exp
+sort: text ordering performed using simple byte comparison
+sort: key 1 is numeric and spans multiple fields
+sort: field separator ',' is treated as a group separator in numbers
+EOF
+if test $(printf '0,9\n0,8\n' | sort -ns | tail -n1) = '0,9'; then
+ # thousands_sep == ,
+ sort -nk1 -t, --debug /dev/null 2>out || fail=1
+ compare exp out || fail=1
+fi
+
+cat <<\EOF > exp
+sort: text ordering performed using simple byte comparison
+sort: key 1 is numeric and spans multiple fields
+sort: field separator '.' is treated as a decimal point in numbers
+EOF
+sort -nk1 -t. --debug /dev/null 2>out || fail=1
+compare exp out || fail=1
+
+cat <<\EOF > exp
+sort: text ordering performed using simple byte comparison
+sort: key 1 is numeric and spans multiple fields
+sort: field separator '-' is treated as a minus sign in numbers
+sort: note numbers use '.' as a decimal point in this locale
+EOF
+sort -nk1 -t- --debug /dev/null 2>out || fail=1
+compare exp out || fail=1
+
+cat <<\EOF > exp
+sort: text ordering performed using simple byte comparison
+sort: key 1 is numeric and spans multiple fields
+sort: field separator '+' is treated as a plus sign in numbers
+sort: note numbers use '.' as a decimal point in this locale
+EOF
+sort -gk1 -t+ --debug /dev/null 2>out || fail=1
compare exp out || fail=1
Exit $fail