From: Jim Meyering Date: Sat, 26 Apr 2008 07:28:48 +0000 (+0200) Subject: avoid problems with sign-extended "char" operand to is* functions X-Git-Tag: v6.12~68 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=209850fd7e1e89cf8937310878bd22d70e3588a5;p=thirdparty%2Fcoreutils.git avoid problems with sign-extended "char" operand to is* functions * src/cut.c (set_fields): Apply to_uchar to isblank operands. * src/uniq.c (find_field): Likewise. * src/seq.c (scan_arg): Likewise, for isspace. * tests/misc/uniq: New file. Test for the above, but only when isspace(0240). * tests/Makefile.am (TESTS): Add misc/uniq. * configure.ac: Use gt_LOCALE_FR. * tests/check.mk (TESTS_ENVIRONMENT): Propagate LOCALE_FR to scripts. * NEWS: Mention the bug fixes. Before this patch, on FreeBSD 6: $ printf 'x y z\nx \xa0 y z\n' > in $ LC_ALL=fr_FR.UTF-8 uniq -f2 in|tr ' ' . x.y.z x. .y.z With the patch: $ LC_ALL=fr_FR.UTF-8 uniq -f2 in|tr ' ' . x.y.z This also affected many other locales: for i in $(locale -a); do test $(LC_ALL=$i ./uniq -f1 in|wc -l) = $(LC_ALL=$i uniq -f1 in|wc -l) || echo $i ; done ... en_GB.ISO8859-1 en_GB.ISO8859-15 en_GB.UTF-8 en_IE.UTF-8 en_NZ.ISO8859-1 en_NZ.ISO8859-15 en_NZ.UTF-8 en_US.ISO8859-1 en_US.ISO8859-15 en_US.UTF-8 ... --- diff --git a/NEWS b/NEWS index d7ec1b2f17..8cee7f5fa7 100644 --- a/NEWS +++ b/NEWS @@ -13,6 +13,11 @@ GNU coreutils NEWS -*- outline -*- Printing of such large-numbered, kernel-only (not in /etc/group) group-IDs was suppressed in 6.11 due to ignorance that they are useful. + uniq: avoid subtle field-skipping malfunction due to isblank misuse. + In some locales on some systems, isblank(240) (aka  ) is nonzero. + On such systems, uniq --skip-fields=N would fail to skip the proper + number of fields for some inputs. + tac: avoid segfault with --regex (-r) and multiple files, e.g., "echo > x; tac -r x x". [bug present at least in textutils-1.8b, from 1992] diff --git a/configure.ac b/configure.ac index 6a7c5a8dff..7e5546a456 100644 --- a/configure.ac +++ b/configure.ac @@ -335,6 +335,9 @@ AC_SUBST([CONFIG_STATUS_DEPENDENCIES]) AM_GNU_GETTEXT([external], [need-formatstring-macros]) AM_GNU_GETTEXT_VERSION([0.15]) +# For a test of uniq: it uses the $LOCALE_FR envvar. +gt_LOCALE_FR + AC_CONFIG_FILES( Makefile doc/Makefile diff --git a/src/cut.c b/src/cut.c index e0ce42a6b3..f6254e45b3 100644 --- a/src/cut.c +++ b/src/cut.c @@ -1,5 +1,5 @@ /* cut - remove parts of lines of files - Copyright (C) 1997-2007 Free Software Foundation, Inc. + Copyright (C) 1997-2008 Free Software Foundation, Inc. Copyright (C) 1984 David M. Ihnat This program is free software: you can redistribute it and/or modify @@ -372,7 +372,8 @@ set_fields (const char *fieldstr) initial = (lhs_specified ? value : 1); value = 0; } - else if (*fieldstr == ',' || isblank (*fieldstr) || *fieldstr == '\0') + else if (*fieldstr == ',' || + isblank (to_uchar (*fieldstr)) || *fieldstr == '\0') { in_digits = false; /* Ending the string, or this field/byte sublist. */ diff --git a/src/seq.c b/src/seq.c index 7fc89ad5d6..efc0c7243a 100644 --- a/src/seq.c +++ b/src/seq.c @@ -145,7 +145,7 @@ scan_arg (const char *arg) } /* We don't output spaces or '+' so don't include in width */ - while (isspace (*arg) || *arg == '+') + while (isspace (to_uchar (*arg)) || *arg == '+') arg++; ret.width = strlen (arg); diff --git a/src/uniq.c b/src/uniq.c index 1a54055b30..b167bbb083 100644 --- a/src/uniq.c +++ b/src/uniq.c @@ -1,5 +1,5 @@ /* uniq -- remove duplicate lines from a sorted file - Copyright (C) 86, 91, 1995-2007 Free Software Foundation, Inc. + Copyright (C) 86, 91, 1995-2008 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -213,9 +213,9 @@ find_field (const struct linebuffer *line) for (count = 0; count < skip_fields && i < size; count++) { - while (i < size && isblank (lp[i])) + while (i < size && isblank (to_uchar (lp[i]))) i++; - while (i < size && !isblank (lp[i])) + while (i < size && !isblank (to_uchar (lp[i]))) i++; } diff --git a/tests/Makefile.am b/tests/Makefile.am index 343d719488..515cfe6fe1 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -211,6 +211,7 @@ TESTS = \ misc/tsort \ misc/tty-eof \ misc/unexpand \ + misc/uniq \ chmod/c-option \ chmod/equal-x \ chmod/equals \ diff --git a/tests/check.mk b/tests/check.mk index f62661ca02..abe03afcf8 100644 --- a/tests/check.mk +++ b/tests/check.mk @@ -45,6 +45,7 @@ built_programs = \ # Append this, because automake does the same. TESTS_ENVIRONMENT = \ + LOCALE_FR='$(LOCALE_FR)' \ abs_top_builddir='$(abs_top_builddir)' \ abs_top_srcdir='$(abs_top_srcdir)' \ built_programs="`$(built_programs)`" \ diff --git a/tests/misc/uniq b/tests/misc/uniq new file mode 100755 index 0000000000..6587844ce4 --- /dev/null +++ b/tests/misc/uniq @@ -0,0 +1,58 @@ +#!/bin/sh +# Test for a subtle, system-and-locale-dependent bug in uniq. + +# Copyright (C) 2008 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +case $LOCALE_FR in +''|none) echo "$0: skipping this test -- no appropriate locale" 1>&2; exit 77;; +esac + +: ${srcdir=.} +. $top_srcdir/tests/require-perl + +me=`echo $0|sed 's,.*/,,'` +exec $PERL -w -I$top_srcdir/tests -MCoreutils -M"CuTmpdir qw($me)" -- - <<\EOF +require 5.003; +use strict; + +my $prog = 'uniq'; + +# Turn off localization of executable's output. +@ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3; + +# I've only ever triggered the problem in a non-C locale. +my $locale = $ENV{LOCALE_FR}; + +# See if isblank returns true for nbsp. +my $x = `env printf '\xa0'| LC_ALL=$locale tr '[:blank:]' x`; +# If so, expect just one line of output in the schar test. +# Otherwise, expect two. +my $in = " y z\n\xa0 y z\n"; +my $schar_exp = $x eq 'x' ? " y z\n" : $in; + +my @Tests = + ( + ['schar', '-f1', {IN => $in}, {OUT => $schar_exp}, + {ENV => "LC_ALL=$locale"}, + ], + ); + +my $save_temps = $ENV{DEBUG}; +my $verbose = $ENV{VERBOSE}; + +my $fail = run_tests ($prog, $prog, \@Tests, $save_temps, $verbose); +exit $fail; +EOF