]> git.ipfire.org Git - thirdparty/gcc.git/blame - libgo/go/exp/norm/normregtest.go
libgo: Update to current sources.
[thirdparty/gcc.git] / libgo / go / exp / norm / normregtest.go
CommitLineData
d8f41257
ILT
1// Copyright 2011 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
cbb6491d
ILT
5// +build ignore
6
d8f41257
ILT
7package main
8
9import (
10 "bufio"
11 "bytes"
12 "exp/norm"
13 "flag"
14 "fmt"
2fd401c8 15 "io"
d8f41257 16 "log"
9c63abc9 17 "net/http"
d8f41257
ILT
18 "os"
19 "path"
20 "regexp"
21 "runtime"
d8f41257 22 "strconv"
506cf9aa 23 "strings"
d8f41257 24 "time"
4ccad563 25 "unicode"
9c63abc9 26 "unicode/utf8"
d8f41257
ILT
27)
28
29func main() {
30 flag.Parse()
31 loadTestData()
32 CharacterByCharacterTests()
33 StandardTests()
34 PerformanceTest()
35 if errorCount == 0 {
36 fmt.Println("PASS")
37 }
38}
39
40const file = "NormalizationTest.txt"
41
42var url = flag.String("url",
4ccad563 43 "http://www.unicode.org/Public/"+unicode.Version+"/ucd/"+file,
d8f41257
ILT
44 "URL of Unicode database directory")
45var localFiles = flag.Bool("local",
46 false,
47 "data files have been copied to the current directory; for debugging only")
48
49var logger = log.New(os.Stderr, "", log.Lshortfile)
50
51// This regression test runs the test set in NormalizationTest.txt
4ccad563 52// (taken from http://www.unicode.org/Public/<unicode.Version>/ucd/).
d8f41257
ILT
53//
54// NormalizationTest.txt has form:
55// @Part0 # Specific cases
56// #
57// 1E0A;1E0A;0044 0307;1E0A;0044 0307; # (Ḋ; Ḋ; D◌̇; Ḋ; D◌̇; ) LATIN CAPITAL LETTER D WITH DOT ABOVE
58// 1E0C;1E0C;0044 0323;1E0C;0044 0323; # (Ḍ; Ḍ; D◌̣; Ḍ; D◌̣; ) LATIN CAPITAL LETTER D WITH DOT BELOW
59//
60// Each test has 5 columns (c1, c2, c3, c4, c5), where
61// (c1, c2, c3, c4, c5) == (c1, NFC(c1), NFD(c1), NFKC(c1), NFKD(c1))
62//
63// CONFORMANCE:
64// 1. The following invariants must be true for all conformant implementations
65//
66// NFC
67// c2 == NFC(c1) == NFC(c2) == NFC(c3)
68// c4 == NFC(c4) == NFC(c5)
69//
70// NFD
71// c3 == NFD(c1) == NFD(c2) == NFD(c3)
72// c5 == NFD(c4) == NFD(c5)
73//
74// NFKC
75// c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5)
76//
77// NFKD
78// c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5)
79//
80// 2. For every code point X assigned in this version of Unicode that is not
81// specifically listed in Part 1, the following invariants must be true
82// for all conformant implementations:
83//
84// X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X)
85//
86
87// Column types.
88const (
89 cRaw = iota
90 cNFC
91 cNFD
92 cNFKC
93 cNFKD
94 cMaxColumns
95)
96
97// Holds data from NormalizationTest.txt
98var part []Part
99
100type Part struct {
101 name string
102 number int
103 tests []Test
104}
105
106type Test struct {
107 name string
108 partnr int
109 number int
506cf9aa 110 r rune // used for character by character test
d8f41257
ILT
111 cols [cMaxColumns]string // Each has 5 entries, see below.
112}
113
114func (t Test) Name() string {
115 if t.number < 0 {
116 return part[t.partnr].name
117 }
118 return fmt.Sprintf("%s:%d", part[t.partnr].name, t.number)
119}
120
121var partRe = regexp.MustCompile(`@Part(\d) # (.*)\n$`)
122var testRe = regexp.MustCompile(`^` + strings.Repeat(`([\dA-F ]+);`, 5) + ` # (.*)\n?$`)
123
124var counter int
125
126// Load the data form NormalizationTest.txt
127func loadTestData() {
128 if *localFiles {
129 pwd, _ := os.Getwd()
130 *url = "file://" + path.Join(pwd, file)
131 }
132 t := &http.Transport{}
133 t.RegisterProtocol("file", http.NewFileTransport(http.Dir("/")))
134 c := &http.Client{Transport: t}
135 resp, err := c.Get(*url)
136 if err != nil {
137 logger.Fatal(err)
138 }
139 if resp.StatusCode != 200 {
140 logger.Fatal("bad GET status for "+file, resp.Status)
141 }
142 f := resp.Body
143 defer f.Close()
144 input := bufio.NewReader(f)
145 for {
146 line, err := input.ReadString('\n')
147 if err != nil {
2fd401c8 148 if err == io.EOF {
d8f41257
ILT
149 break
150 }
151 logger.Fatal(err)
152 }
153 if len(line) == 0 || line[0] == '#' {
154 continue
155 }
156 m := partRe.FindStringSubmatch(line)
157 if m != nil {
158 if len(m) < 3 {
159 logger.Fatal("Failed to parse Part: ", line)
160 }
161 i, err := strconv.Atoi(m[1])
162 if err != nil {
163 logger.Fatal(err)
164 }
165 name := m[2]
166 part = append(part, Part{name: name[:len(name)-1], number: i})
167 continue
168 }
169 m = testRe.FindStringSubmatch(line)
170 if m == nil || len(m) < 7 {
171 logger.Fatalf(`Failed to parse: "%s" result: %#v`, line, m)
172 }
173 test := Test{name: m[6], partnr: len(part) - 1, number: counter}
174 counter++
175 for j := 1; j < len(m)-1; j++ {
176 for _, split := range strings.Split(m[j], " ") {
d5363590 177 r, err := strconv.ParseUint(split, 16, 64)
d8f41257
ILT
178 if err != nil {
179 logger.Fatal(err)
180 }
506cf9aa 181 if test.r == 0 {
d8f41257 182 // save for CharacterByCharacterTests
9a0e3259 183 test.r = rune(r)
d8f41257
ILT
184 }
185 var buf [utf8.UTFMax]byte
506cf9aa 186 sz := utf8.EncodeRune(buf[:], rune(r))
d8f41257
ILT
187 test.cols[j-1] += string(buf[:sz])
188 }
189 }
190 part := &part[len(part)-1]
191 part.tests = append(part.tests, test)
192 }
193}
194
195var fstr = []string{"NFC", "NFD", "NFKC", "NFKD"}
196
197var errorCount int
198
199func cmpResult(t *Test, name string, f norm.Form, gold, test, result string) {
200 if gold != result {
201 errorCount++
202 if errorCount > 20 {
203 return
204 }
506cf9aa 205 st, sr, sg := []rune(test), []rune(result), []rune(gold)
d8f41257
ILT
206 logger.Printf("%s:%s: %s(%X)=%X; want:%X: %s",
207 t.Name(), name, fstr[f], st, sr, sg, t.name)
208 }
209}
210
211func cmpIsNormal(t *Test, name string, f norm.Form, test string, result, want bool) {
212 if result != want {
213 errorCount++
214 if errorCount > 20 {
215 return
216 }
506cf9aa 217 logger.Printf("%s:%s: %s(%X)=%v; want: %v", t.Name(), name, fstr[f], []rune(test), result, want)
d8f41257
ILT
218 }
219}
220
221func doTest(t *Test, f norm.Form, gold, test string) {
222 result := f.Bytes([]byte(test))
223 cmpResult(t, "Bytes", f, gold, test, string(result))
501699af
ILT
224 sresult := f.String(test)
225 cmpResult(t, "String", f, gold, test, sresult)
226 buf := make([]byte, norm.MaxSegmentSize)
227 acc := []byte{}
228 i := norm.Iter{}
229 i.SetInputString(f, test)
230 for !i.Done() {
231 n := i.Next(buf)
232 acc = append(acc, buf[:n]...)
233 }
234 cmpResult(t, "Iter.Next", f, gold, test, string(acc))
d8f41257
ILT
235 for i := range test {
236 out := f.Append(f.Bytes([]byte(test[:i])), []byte(test[i:])...)
237 cmpResult(t, fmt.Sprintf(":Append:%d", i), f, gold, test, string(out))
238 }
239 cmpIsNormal(t, "IsNormal", f, test, f.IsNormal([]byte(test)), test == gold)
240}
241
242func doConformanceTests(t *Test, partn int) {
243 for i := 0; i <= 2; i++ {
244 doTest(t, norm.NFC, t.cols[1], t.cols[i])
245 doTest(t, norm.NFD, t.cols[2], t.cols[i])
246 doTest(t, norm.NFKC, t.cols[3], t.cols[i])
247 doTest(t, norm.NFKD, t.cols[4], t.cols[i])
248 }
249 for i := 3; i <= 4; i++ {
250 doTest(t, norm.NFC, t.cols[3], t.cols[i])
251 doTest(t, norm.NFD, t.cols[4], t.cols[i])
252 doTest(t, norm.NFKC, t.cols[3], t.cols[i])
253 doTest(t, norm.NFKD, t.cols[4], t.cols[i])
254 }
255}
256
257func CharacterByCharacterTests() {
258 tests := part[1].tests
9a0e3259 259 var last rune = 0
d8f41257 260 for i := 0; i <= len(tests); i++ { // last one is special case
9a0e3259 261 var r rune
d8f41257 262 if i == len(tests) {
506cf9aa 263 r = 0x2FA1E // Don't have to go to 0x10FFFF
d8f41257 264 } else {
506cf9aa 265 r = tests[i].r
d8f41257 266 }
506cf9aa 267 for last++; last < r; last++ {
d8f41257
ILT
268 // Check all characters that were not explicitly listed in the test.
269 t := &Test{partnr: 1, number: -1}
270 char := string(last)
271 doTest(t, norm.NFC, char, char)
272 doTest(t, norm.NFD, char, char)
273 doTest(t, norm.NFKC, char, char)
274 doTest(t, norm.NFKD, char, char)
275 }
276 if i < len(tests) {
277 doConformanceTests(&tests[i], 1)
278 }
279 }
280}
281
282func StandardTests() {
283 for _, j := range []int{0, 2, 3} {
284 for _, test := range part[j].tests {
285 doConformanceTests(&test, j)
286 }
287 }
288}
289
290// PerformanceTest verifies that normalization is O(n). If any of the
291// code does not properly check for maxCombiningChars, normalization
292// may exhibit O(n**2) behavior.
293func PerformanceTest() {
294 runtime.GOMAXPROCS(2)
295 success := make(chan bool, 1)
296 go func() {
297 buf := bytes.Repeat([]byte("\u035D"), 1024*1024)
298 buf = append(buf, "\u035B"...)
299 norm.NFC.Append(nil, buf...)
300 success <- true
301 }()
9a0e3259 302 timeout := time.After(1 * time.Second)
d8f41257
ILT
303 select {
304 case <-success:
305 // test completed before the timeout
306 case <-timeout:
307 errorCount++
308 logger.Printf(`unexpectedly long time to complete PerformanceTest`)
309 }
310}