]>
Commit | Line | Data |
---|---|---|
d8f41257 ILT |
1 | // Copyright 2011 The Go Authors. All rights reserved. |
2 | // Use of this source code is governed by a BSD-style | |
3 | // license that can be found in the LICENSE file. | |
4 | ||
cbb6491d ILT |
5 | // +build ignore |
6 | ||
d8f41257 ILT |
7 | package main |
8 | ||
9 | import ( | |
10 | "bufio" | |
11 | "bytes" | |
12 | "exp/norm" | |
13 | "flag" | |
14 | "fmt" | |
2fd401c8 | 15 | "io" |
d8f41257 | 16 | "log" |
9c63abc9 | 17 | "net/http" |
d8f41257 ILT |
18 | "os" |
19 | "path" | |
20 | "regexp" | |
21 | "runtime" | |
d8f41257 | 22 | "strconv" |
506cf9aa | 23 | "strings" |
d8f41257 | 24 | "time" |
4ccad563 | 25 | "unicode" |
9c63abc9 | 26 | "unicode/utf8" |
d8f41257 ILT |
27 | ) |
28 | ||
29 | func main() { | |
30 | flag.Parse() | |
31 | loadTestData() | |
32 | CharacterByCharacterTests() | |
33 | StandardTests() | |
34 | PerformanceTest() | |
35 | if errorCount == 0 { | |
36 | fmt.Println("PASS") | |
37 | } | |
38 | } | |
39 | ||
40 | const file = "NormalizationTest.txt" | |
41 | ||
42 | var url = flag.String("url", | |
4ccad563 | 43 | "http://www.unicode.org/Public/"+unicode.Version+"/ucd/"+file, |
d8f41257 ILT |
44 | "URL of Unicode database directory") |
45 | var localFiles = flag.Bool("local", | |
46 | false, | |
47 | "data files have been copied to the current directory; for debugging only") | |
48 | ||
49 | var logger = log.New(os.Stderr, "", log.Lshortfile) | |
50 | ||
51 | // This regression test runs the test set in NormalizationTest.txt | |
4ccad563 | 52 | // (taken from http://www.unicode.org/Public/<unicode.Version>/ucd/). |
d8f41257 ILT |
53 | // |
54 | // NormalizationTest.txt has form: | |
55 | // @Part0 # Specific cases | |
56 | // # | |
57 | // 1E0A;1E0A;0044 0307;1E0A;0044 0307; # (Ḋ; Ḋ; D◌̇; Ḋ; D◌̇; ) LATIN CAPITAL LETTER D WITH DOT ABOVE | |
58 | // 1E0C;1E0C;0044 0323;1E0C;0044 0323; # (Ḍ; Ḍ; D◌̣; Ḍ; D◌̣; ) LATIN CAPITAL LETTER D WITH DOT BELOW | |
59 | // | |
60 | // Each test has 5 columns (c1, c2, c3, c4, c5), where | |
61 | // (c1, c2, c3, c4, c5) == (c1, NFC(c1), NFD(c1), NFKC(c1), NFKD(c1)) | |
62 | // | |
63 | // CONFORMANCE: | |
64 | // 1. The following invariants must be true for all conformant implementations | |
65 | // | |
66 | // NFC | |
67 | // c2 == NFC(c1) == NFC(c2) == NFC(c3) | |
68 | // c4 == NFC(c4) == NFC(c5) | |
69 | // | |
70 | // NFD | |
71 | // c3 == NFD(c1) == NFD(c2) == NFD(c3) | |
72 | // c5 == NFD(c4) == NFD(c5) | |
73 | // | |
74 | // NFKC | |
75 | // c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5) | |
76 | // | |
77 | // NFKD | |
78 | // c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5) | |
79 | // | |
80 | // 2. For every code point X assigned in this version of Unicode that is not | |
81 | // specifically listed in Part 1, the following invariants must be true | |
82 | // for all conformant implementations: | |
83 | // | |
84 | // X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X) | |
85 | // | |
86 | ||
87 | // Column types. | |
88 | const ( | |
89 | cRaw = iota | |
90 | cNFC | |
91 | cNFD | |
92 | cNFKC | |
93 | cNFKD | |
94 | cMaxColumns | |
95 | ) | |
96 | ||
97 | // Holds data from NormalizationTest.txt | |
98 | var part []Part | |
99 | ||
100 | type Part struct { | |
101 | name string | |
102 | number int | |
103 | tests []Test | |
104 | } | |
105 | ||
106 | type Test struct { | |
107 | name string | |
108 | partnr int | |
109 | number int | |
506cf9aa | 110 | r rune // used for character by character test |
d8f41257 ILT |
111 | cols [cMaxColumns]string // Each has 5 entries, see below. |
112 | } | |
113 | ||
114 | func (t Test) Name() string { | |
115 | if t.number < 0 { | |
116 | return part[t.partnr].name | |
117 | } | |
118 | return fmt.Sprintf("%s:%d", part[t.partnr].name, t.number) | |
119 | } | |
120 | ||
121 | var partRe = regexp.MustCompile(`@Part(\d) # (.*)\n$`) | |
122 | var testRe = regexp.MustCompile(`^` + strings.Repeat(`([\dA-F ]+);`, 5) + ` # (.*)\n?$`) | |
123 | ||
124 | var counter int | |
125 | ||
126 | // Load the data form NormalizationTest.txt | |
127 | func loadTestData() { | |
128 | if *localFiles { | |
129 | pwd, _ := os.Getwd() | |
130 | *url = "file://" + path.Join(pwd, file) | |
131 | } | |
132 | t := &http.Transport{} | |
133 | t.RegisterProtocol("file", http.NewFileTransport(http.Dir("/"))) | |
134 | c := &http.Client{Transport: t} | |
135 | resp, err := c.Get(*url) | |
136 | if err != nil { | |
137 | logger.Fatal(err) | |
138 | } | |
139 | if resp.StatusCode != 200 { | |
140 | logger.Fatal("bad GET status for "+file, resp.Status) | |
141 | } | |
142 | f := resp.Body | |
143 | defer f.Close() | |
144 | input := bufio.NewReader(f) | |
145 | for { | |
146 | line, err := input.ReadString('\n') | |
147 | if err != nil { | |
2fd401c8 | 148 | if err == io.EOF { |
d8f41257 ILT |
149 | break |
150 | } | |
151 | logger.Fatal(err) | |
152 | } | |
153 | if len(line) == 0 || line[0] == '#' { | |
154 | continue | |
155 | } | |
156 | m := partRe.FindStringSubmatch(line) | |
157 | if m != nil { | |
158 | if len(m) < 3 { | |
159 | logger.Fatal("Failed to parse Part: ", line) | |
160 | } | |
161 | i, err := strconv.Atoi(m[1]) | |
162 | if err != nil { | |
163 | logger.Fatal(err) | |
164 | } | |
165 | name := m[2] | |
166 | part = append(part, Part{name: name[:len(name)-1], number: i}) | |
167 | continue | |
168 | } | |
169 | m = testRe.FindStringSubmatch(line) | |
170 | if m == nil || len(m) < 7 { | |
171 | logger.Fatalf(`Failed to parse: "%s" result: %#v`, line, m) | |
172 | } | |
173 | test := Test{name: m[6], partnr: len(part) - 1, number: counter} | |
174 | counter++ | |
175 | for j := 1; j < len(m)-1; j++ { | |
176 | for _, split := range strings.Split(m[j], " ") { | |
d5363590 | 177 | r, err := strconv.ParseUint(split, 16, 64) |
d8f41257 ILT |
178 | if err != nil { |
179 | logger.Fatal(err) | |
180 | } | |
506cf9aa | 181 | if test.r == 0 { |
d8f41257 | 182 | // save for CharacterByCharacterTests |
9a0e3259 | 183 | test.r = rune(r) |
d8f41257 ILT |
184 | } |
185 | var buf [utf8.UTFMax]byte | |
506cf9aa | 186 | sz := utf8.EncodeRune(buf[:], rune(r)) |
d8f41257 ILT |
187 | test.cols[j-1] += string(buf[:sz]) |
188 | } | |
189 | } | |
190 | part := &part[len(part)-1] | |
191 | part.tests = append(part.tests, test) | |
192 | } | |
193 | } | |
194 | ||
195 | var fstr = []string{"NFC", "NFD", "NFKC", "NFKD"} | |
196 | ||
197 | var errorCount int | |
198 | ||
199 | func cmpResult(t *Test, name string, f norm.Form, gold, test, result string) { | |
200 | if gold != result { | |
201 | errorCount++ | |
202 | if errorCount > 20 { | |
203 | return | |
204 | } | |
506cf9aa | 205 | st, sr, sg := []rune(test), []rune(result), []rune(gold) |
d8f41257 ILT |
206 | logger.Printf("%s:%s: %s(%X)=%X; want:%X: %s", |
207 | t.Name(), name, fstr[f], st, sr, sg, t.name) | |
208 | } | |
209 | } | |
210 | ||
211 | func cmpIsNormal(t *Test, name string, f norm.Form, test string, result, want bool) { | |
212 | if result != want { | |
213 | errorCount++ | |
214 | if errorCount > 20 { | |
215 | return | |
216 | } | |
506cf9aa | 217 | logger.Printf("%s:%s: %s(%X)=%v; want: %v", t.Name(), name, fstr[f], []rune(test), result, want) |
d8f41257 ILT |
218 | } |
219 | } | |
220 | ||
221 | func doTest(t *Test, f norm.Form, gold, test string) { | |
222 | result := f.Bytes([]byte(test)) | |
223 | cmpResult(t, "Bytes", f, gold, test, string(result)) | |
501699af ILT |
224 | sresult := f.String(test) |
225 | cmpResult(t, "String", f, gold, test, sresult) | |
226 | buf := make([]byte, norm.MaxSegmentSize) | |
227 | acc := []byte{} | |
228 | i := norm.Iter{} | |
229 | i.SetInputString(f, test) | |
230 | for !i.Done() { | |
231 | n := i.Next(buf) | |
232 | acc = append(acc, buf[:n]...) | |
233 | } | |
234 | cmpResult(t, "Iter.Next", f, gold, test, string(acc)) | |
d8f41257 ILT |
235 | for i := range test { |
236 | out := f.Append(f.Bytes([]byte(test[:i])), []byte(test[i:])...) | |
237 | cmpResult(t, fmt.Sprintf(":Append:%d", i), f, gold, test, string(out)) | |
238 | } | |
239 | cmpIsNormal(t, "IsNormal", f, test, f.IsNormal([]byte(test)), test == gold) | |
240 | } | |
241 | ||
242 | func doConformanceTests(t *Test, partn int) { | |
243 | for i := 0; i <= 2; i++ { | |
244 | doTest(t, norm.NFC, t.cols[1], t.cols[i]) | |
245 | doTest(t, norm.NFD, t.cols[2], t.cols[i]) | |
246 | doTest(t, norm.NFKC, t.cols[3], t.cols[i]) | |
247 | doTest(t, norm.NFKD, t.cols[4], t.cols[i]) | |
248 | } | |
249 | for i := 3; i <= 4; i++ { | |
250 | doTest(t, norm.NFC, t.cols[3], t.cols[i]) | |
251 | doTest(t, norm.NFD, t.cols[4], t.cols[i]) | |
252 | doTest(t, norm.NFKC, t.cols[3], t.cols[i]) | |
253 | doTest(t, norm.NFKD, t.cols[4], t.cols[i]) | |
254 | } | |
255 | } | |
256 | ||
257 | func CharacterByCharacterTests() { | |
258 | tests := part[1].tests | |
9a0e3259 | 259 | var last rune = 0 |
d8f41257 | 260 | for i := 0; i <= len(tests); i++ { // last one is special case |
9a0e3259 | 261 | var r rune |
d8f41257 | 262 | if i == len(tests) { |
506cf9aa | 263 | r = 0x2FA1E // Don't have to go to 0x10FFFF |
d8f41257 | 264 | } else { |
506cf9aa | 265 | r = tests[i].r |
d8f41257 | 266 | } |
506cf9aa | 267 | for last++; last < r; last++ { |
d8f41257 ILT |
268 | // Check all characters that were not explicitly listed in the test. |
269 | t := &Test{partnr: 1, number: -1} | |
270 | char := string(last) | |
271 | doTest(t, norm.NFC, char, char) | |
272 | doTest(t, norm.NFD, char, char) | |
273 | doTest(t, norm.NFKC, char, char) | |
274 | doTest(t, norm.NFKD, char, char) | |
275 | } | |
276 | if i < len(tests) { | |
277 | doConformanceTests(&tests[i], 1) | |
278 | } | |
279 | } | |
280 | } | |
281 | ||
282 | func StandardTests() { | |
283 | for _, j := range []int{0, 2, 3} { | |
284 | for _, test := range part[j].tests { | |
285 | doConformanceTests(&test, j) | |
286 | } | |
287 | } | |
288 | } | |
289 | ||
290 | // PerformanceTest verifies that normalization is O(n). If any of the | |
291 | // code does not properly check for maxCombiningChars, normalization | |
292 | // may exhibit O(n**2) behavior. | |
293 | func PerformanceTest() { | |
294 | runtime.GOMAXPROCS(2) | |
295 | success := make(chan bool, 1) | |
296 | go func() { | |
297 | buf := bytes.Repeat([]byte("\u035D"), 1024*1024) | |
298 | buf = append(buf, "\u035B"...) | |
299 | norm.NFC.Append(nil, buf...) | |
300 | success <- true | |
301 | }() | |
9a0e3259 | 302 | timeout := time.After(1 * time.Second) |
d8f41257 ILT |
303 | select { |
304 | case <-success: | |
305 | // test completed before the timeout | |
306 | case <-timeout: | |
307 | errorCount++ | |
308 | logger.Printf(`unexpectedly long time to complete PerformanceTest`) | |
309 | } | |
310 | } |