Fix test cases tst-fnmatch and tst-regexloc for the new iso14651_t1_common file.

author Mike FABIAN <mfabian@redhat.com>

Tue, 23 Jan 2018 16:29:36 +0000 (17:29 +0100)

committer Mike FABIAN <mfabian@redhat.com>

Tue, 27 Feb 2018 16:00:21 +0000 (17:00 +0100)
author Mike FABIAN <mfabian@redhat.com>
Tue, 23 Jan 2018 16:29:36 +0000 (17:29 +0100)
committer Mike FABIAN <mfabian@redhat.com>
Tue, 27 Feb 2018 16:00:21 +0000 (17:00 +0100)
diff --git a/ChangeLog b/ChangeLog

index 4091d0717bf5819e400c0eeddf2c6c03bc97b11a..3bff7b8266bbcca1e6c4791887cf5714876390f8 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+2018-02-27  Mike FABIAN  <mfabian@redhat.com>
+
+       * posix/tst-fnmatch.input: Fix results for range expressions
+       for non C locales.
+       * posix/tst-regexloc.c: Do not use a range expression for
+       de_DE.ISO-8859-1 locale.
+
  2018-02-27  Mike FABIAN  <mfabian@redhat.com>
  
         * posix/bug-regex5.c: Fix test case because with the new
diff --git a/posix/tst-fnmatch.input b/posix/tst-fnmatch.input

index 88b3f739a59333d58f2a088769b9e9c4d7b6ea09..589fb2a94038dbe363885a52169342a989f0ff3c 100644 (file)
--- a/posix/tst-fnmatch.input
+++ b/posix/tst-fnmatch.input
@@ -418,21 +418,47 @@ C         "-"                     "[Z-\\]]"              NOMATCH
  # Following are tests outside the scope of IEEE 2003.2 since they are using
  # locales other than the C locale.  The main focus of the tests is on the
  # handling of ranges and the recognition of character (vs bytes).
+#
+# See:
+#
+# http://pubs.opengroup.org/onlinepubs/7908799/xbd/re.html
+#
+# > A range expression represents the set of collating elements that fall
+# > between two elements in the current collation sequence,
+# > inclusively. It is expressed as the starting point and the ending
+# > point separated by a hyphen (-).
+# >
+# > Range expressions must not be used in portable applications because
+# > their behaviour is dependent on the collating sequence. Ranges will be
+# > treated according to the current collating sequence, and include such
+# > characters that fall within the range based on that collating
+# > sequence, regardless of character values. This, however, means that
+# > the interpretation will differ depending on collating sequence. If,
+# > for instance, one collating sequence defines ä as a variant of a,
+# > while another defines it as a letter following z, then the expression
+# > [ä-z] is valid in the first language and invalid in the second.
+#
+# Therefore, using [a-z] does not make much sense except in the C/POSIX locale.
+# The new iso14651_t1_common lists upper case and lower case Latin characters
+# in a different order than the old one which causes surprising results
+# for example in the de_DE locale: [a-z] now includes A because A comes
+# after a in iso14651_t1_common but does not include Z because that comes
+# after z in iso14651_t1_common.
  de_DE.ISO-8859-1 "a"                   "[a-z]"                0
  de_DE.ISO-8859-1 "z"                   "[a-z]"                0
  de_DE.ISO-8859-1 "ä"                   "[a-z]"                0
  de_DE.ISO-8859-1 "ö"                   "[a-z]"                0
  de_DE.ISO-8859-1 "ü"                   "[a-z]"                0
-de_DE.ISO-8859-1 "A"                   "[a-z]"                NOMATCH
+de_DE.ISO-8859-1 "A"                   "[a-z]"                0 # surprising but correct!
  de_DE.ISO-8859-1 "Z"                   "[a-z]"                NOMATCH
-de_DE.ISO-8859-1 "Ä"                   "[a-z]"                NOMATCH
-de_DE.ISO-8859-1 "Ö"                   "[a-z]"                NOMATCH
-de_DE.ISO-8859-1 "Ü"                   "[a-z]"                NOMATCH
+de_DE.ISO-8859-1 "Ä"                   "[a-z]"                0 # surprising but correct!
+de_DE.ISO-8859-1 "Ö"                   "[a-z]"                0 # surprising but correct!
+de_DE.ISO-8859-1 "Ü"                   "[a-z]"                0 # surprising but correct!
  de_DE.ISO-8859-1 "a"                   "[A-Z]"                NOMATCH
-de_DE.ISO-8859-1 "z"                   "[A-Z]"                NOMATCH
-de_DE.ISO-8859-1 "ä"                   "[A-Z]"                NOMATCH
-de_DE.ISO-8859-1 "ö"                   "[A-Z]"                NOMATCH
-de_DE.ISO-8859-1 "ü"                   "[A-Z]"                NOMATCH
+de_DE.ISO-8859-1 "z"                   "[A-Z]"                0 # surprising but correct!
+de_DE.ISO-8859-1 "ä"                   "[A-Z]"                0 # surprising but correct!
+de_DE.ISO-8859-1 "ö"                   "[A-Z]"                0 # surprising but correct!
+de_DE.ISO-8859-1 "ü"                   "[A-Z]"                0 # surprising but correct!
  de_DE.ISO-8859-1 "A"                   "[A-Z]"                0
  de_DE.ISO-8859-1 "Z"                   "[A-Z]"                0
  de_DE.ISO-8859-1 "Ä"                   "[A-Z]"                0
@@ -515,16 +541,16 @@ de_DE.UTF-8        "z"                    "[a-z]"                0
  de_DE.UTF-8     "ä"                   "[a-z]"                0
  de_DE.UTF-8     "ö"                   "[a-z]"                0
  de_DE.UTF-8     "ü"                   "[a-z]"                0
-de_DE.UTF-8     "A"                    "[a-z]"                NOMATCH
+de_DE.UTF-8     "A"                    "[a-z]"                0 # surprising but correct!
  de_DE.UTF-8     "Z"                    "[a-z]"                NOMATCH
-de_DE.UTF-8     "Ä"                   "[a-z]"                NOMATCH
-de_DE.UTF-8     "Ö"                   "[a-z]"                NOMATCH
-de_DE.UTF-8     "Ü"                   "[a-z]"                NOMATCH
+de_DE.UTF-8     "Ä"                   "[a-z]"        0 # surprising but correct!
+de_DE.UTF-8     "Ö"                   "[a-z]"        0 # surprising but correct!
+de_DE.UTF-8     "Ü"                   "[a-z]"        0 # surprising but correct!
  de_DE.UTF-8     "a"                    "[A-Z]"                NOMATCH
-de_DE.UTF-8     "z"                    "[A-Z]"                NOMATCH
-de_DE.UTF-8     "ä"                   "[A-Z]"                NOMATCH
-de_DE.UTF-8     "ö"                   "[A-Z]"                NOMATCH
-de_DE.UTF-8     "ü"                   "[A-Z]"                NOMATCH
+de_DE.UTF-8     "z"                    "[A-Z]"                0 # surprising but correct!
+de_DE.UTF-8     "ä"                   "[A-Z]"        0 # surprising but correct!
+de_DE.UTF-8     "ö"                   "[A-Z]"        0 # surprising but correct!
+de_DE.UTF-8     "ü"                   "[A-Z]"        0 # surprising but correct!
  de_DE.UTF-8     "A"                    "[A-Z]"                0
  de_DE.UTF-8     "Z"                    "[A-Z]"                0
  de_DE.UTF-8     "Ä"                   "[A-Z]"                0
diff --git a/posix/tst-regexloc.c b/posix/tst-regexloc.c

index 60235b4d3b3e396edefdd0452e6c26a98a915740..7fbc496d0ce5117fe88c18e242e722dfc3089e86 100644 (file)
--- a/posix/tst-regexloc.c
+++ b/posix/tst-regexloc.c
@@ -29,8 +29,8 @@ do_test (void)
  
    if (setlocale (LC_ALL, "de_DE.ISO-8859-1") == NULL)
      puts ("cannot set locale");
-  else if (regcomp (&re, "[a-f]*", 0) != REG_NOERROR)
-    puts ("cannot compile expression \"[a-f]*\"");
+  else if (regcomp (&re, "[abcdef]*", 0) != REG_NOERROR)
+    puts ("cannot compile expression \"[abcdef]*\"");
    else if (regexec (&re, "abcdefCDEF", 1, mat, 0) == REG_NOMATCH)
      puts ("no match");
    else
author	Mike FABIAN <mfabian@redhat.com>
	Tue, 23 Jan 2018 16:29:36 +0000 (17:29 +0100)
committer	Mike FABIAN <mfabian@redhat.com>
	Tue, 27 Feb 2018 16:00:21 +0000 (17:00 +0100)
ChangeLog		patch \| blob \| blame \| history
posix/tst-fnmatch.input		patch \| blob \| blame \| history
posix/tst-regexloc.c		patch \| blob \| blame \| history