]>
Commit | Line | Data |
---|---|---|
7a938933 ILT |
1 | // Copyright 2009 The Go Authors. All rights reserved. |
2 | // Use of this source code is governed by a BSD-style | |
3 | // license that can be found in the LICENSE file. | |
4 | ||
5 | package strings_test | |
6 | ||
7 | import ( | |
adb0401d | 8 | "bytes" |
c2047754 | 9 | "fmt" |
2fd401c8 | 10 | "io" |
4ccad563 | 11 | "math/rand" |
f72f4169 | 12 | "reflect" |
c2047754 | 13 | "runtime" |
4f4a855d | 14 | "strconv" |
7a938933 ILT |
15 | . "strings" |
16 | "testing" | |
17 | "unicode" | |
9c63abc9 | 18 | "unicode/utf8" |
f72f4169 | 19 | "unsafe" |
7a938933 ILT |
20 | ) |
21 | ||
22 | func eq(a, b []string) bool { | |
23 | if len(a) != len(b) { | |
24 | return false | |
25 | } | |
26 | for i := 0; i < len(a); i++ { | |
27 | if a[i] != b[i] { | |
28 | return false | |
29 | } | |
30 | } | |
31 | return true | |
32 | } | |
33 | ||
34 | var abcd = "abcd" | |
35 | var faces = "☺☻☹" | |
36 | var commas = "1,2,3,4" | |
37 | var dots = "1....2....3....4" | |
38 | ||
39 | type IndexTest struct { | |
40 | s string | |
41 | sep string | |
42 | out int | |
43 | } | |
44 | ||
45 | var indexTests = []IndexTest{ | |
46 | {"", "", 0}, | |
47 | {"", "a", -1}, | |
48 | {"", "foo", -1}, | |
49 | {"fo", "foo", -1}, | |
50 | {"foo", "foo", 0}, | |
51 | {"oofofoofooo", "f", 2}, | |
52 | {"oofofoofooo", "foo", 4}, | |
53 | {"barfoobarfoo", "foo", 3}, | |
54 | {"foo", "", 0}, | |
55 | {"foo", "o", 1}, | |
56 | {"abcABCabc", "A", 3}, | |
57 | // cases with one byte strings - test special case in Index() | |
58 | {"", "a", -1}, | |
59 | {"x", "a", -1}, | |
60 | {"x", "x", 0}, | |
61 | {"abc", "a", 0}, | |
62 | {"abc", "b", 1}, | |
63 | {"abc", "c", 2}, | |
64 | {"abc", "x", -1}, | |
f98dd1a3 ILT |
65 | // test special cases in Index() for short strings |
66 | {"", "ab", -1}, | |
67 | {"bc", "ab", -1}, | |
68 | {"ab", "ab", 0}, | |
69 | {"xab", "ab", 1}, | |
70 | {"xab"[:2], "ab", -1}, | |
71 | {"", "abc", -1}, | |
72 | {"xbc", "abc", -1}, | |
73 | {"abc", "abc", 0}, | |
74 | {"xabc", "abc", 1}, | |
75 | {"xabc"[:3], "abc", -1}, | |
76 | {"xabxc", "abc", -1}, | |
77 | {"", "abcd", -1}, | |
78 | {"xbcd", "abcd", -1}, | |
79 | {"abcd", "abcd", 0}, | |
80 | {"xabcd", "abcd", 1}, | |
81 | {"xyabcd"[:5], "abcd", -1}, | |
82 | {"xbcqq", "abcqq", -1}, | |
83 | {"abcqq", "abcqq", 0}, | |
84 | {"xabcqq", "abcqq", 1}, | |
85 | {"xyabcqq"[:6], "abcqq", -1}, | |
86 | {"xabxcqq", "abcqq", -1}, | |
87 | {"xabcqxq", "abcqq", -1}, | |
88 | {"", "01234567", -1}, | |
89 | {"32145678", "01234567", -1}, | |
90 | {"01234567", "01234567", 0}, | |
91 | {"x01234567", "01234567", 1}, | |
c2047754 | 92 | {"x0123456x01234567", "01234567", 9}, |
f98dd1a3 ILT |
93 | {"xx01234567"[:9], "01234567", -1}, |
94 | {"", "0123456789", -1}, | |
95 | {"3214567844", "0123456789", -1}, | |
96 | {"0123456789", "0123456789", 0}, | |
97 | {"x0123456789", "0123456789", 1}, | |
c2047754 | 98 | {"x012345678x0123456789", "0123456789", 11}, |
f98dd1a3 ILT |
99 | {"xyz0123456789"[:12], "0123456789", -1}, |
100 | {"x01234567x89", "0123456789", -1}, | |
101 | {"", "0123456789012345", -1}, | |
102 | {"3214567889012345", "0123456789012345", -1}, | |
103 | {"0123456789012345", "0123456789012345", 0}, | |
104 | {"x0123456789012345", "0123456789012345", 1}, | |
c2047754 | 105 | {"x012345678901234x0123456789012345", "0123456789012345", 17}, |
f98dd1a3 ILT |
106 | {"", "01234567890123456789", -1}, |
107 | {"32145678890123456789", "01234567890123456789", -1}, | |
108 | {"01234567890123456789", "01234567890123456789", 0}, | |
109 | {"x01234567890123456789", "01234567890123456789", 1}, | |
c2047754 | 110 | {"x0123456789012345678x01234567890123456789", "01234567890123456789", 21}, |
f98dd1a3 ILT |
111 | {"xyz01234567890123456789"[:22], "01234567890123456789", -1}, |
112 | {"", "0123456789012345678901234567890", -1}, | |
113 | {"321456788901234567890123456789012345678911", "0123456789012345678901234567890", -1}, | |
114 | {"0123456789012345678901234567890", "0123456789012345678901234567890", 0}, | |
115 | {"x0123456789012345678901234567890", "0123456789012345678901234567890", 1}, | |
c2047754 | 116 | {"x012345678901234567890123456789x0123456789012345678901234567890", "0123456789012345678901234567890", 32}, |
f98dd1a3 ILT |
117 | {"xyz0123456789012345678901234567890"[:33], "0123456789012345678901234567890", -1}, |
118 | {"", "01234567890123456789012345678901", -1}, | |
119 | {"32145678890123456789012345678901234567890211", "01234567890123456789012345678901", -1}, | |
120 | {"01234567890123456789012345678901", "01234567890123456789012345678901", 0}, | |
121 | {"x01234567890123456789012345678901", "01234567890123456789012345678901", 1}, | |
c2047754 | 122 | {"x0123456789012345678901234567890x01234567890123456789012345678901", "01234567890123456789012345678901", 33}, |
f98dd1a3 | 123 | {"xyz01234567890123456789012345678901"[:34], "01234567890123456789012345678901", -1}, |
c2047754 ILT |
124 | {"xxxxxx012345678901234567890123456789012345678901234567890123456789012", "012345678901234567890123456789012345678901234567890123456789012", 6}, |
125 | {"", "0123456789012345678901234567890123456789", -1}, | |
126 | {"xx012345678901234567890123456789012345678901234567890123456789012", "0123456789012345678901234567890123456789", 2}, | |
127 | {"xx012345678901234567890123456789012345678901234567890123456789012"[:41], "0123456789012345678901234567890123456789", -1}, | |
128 | {"xx012345678901234567890123456789012345678901234567890123456789012", "0123456789012345678901234567890123456xxx", -1}, | |
129 | {"xx0123456789012345678901234567890123456789012345678901234567890120123456789012345678901234567890123456xxx", "0123456789012345678901234567890123456xxx", 65}, | |
1a2f01ef ILT |
130 | // test fallback to Rabin-Karp. |
131 | {"oxoxoxoxoxoxoxoxoxoxoxoy", "oy", 22}, | |
132 | {"oxoxoxoxoxoxoxoxoxoxoxox", "oy", -1}, | |
7a938933 ILT |
133 | } |
134 | ||
135 | var lastIndexTests = []IndexTest{ | |
136 | {"", "", 0}, | |
137 | {"", "a", -1}, | |
138 | {"", "foo", -1}, | |
139 | {"fo", "foo", -1}, | |
140 | {"foo", "foo", 0}, | |
141 | {"foo", "f", 0}, | |
142 | {"oofofoofooo", "f", 7}, | |
143 | {"oofofoofooo", "foo", 7}, | |
144 | {"barfoobarfoo", "foo", 9}, | |
145 | {"foo", "", 3}, | |
146 | {"foo", "o", 2}, | |
147 | {"abcABCabc", "A", 3}, | |
148 | {"abcABCabc", "a", 6}, | |
149 | } | |
150 | ||
151 | var indexAnyTests = []IndexTest{ | |
152 | {"", "", -1}, | |
153 | {"", "a", -1}, | |
154 | {"", "abc", -1}, | |
155 | {"a", "", -1}, | |
156 | {"a", "a", 0}, | |
157 | {"aaa", "a", 0}, | |
158 | {"abc", "xyz", -1}, | |
159 | {"abc", "xcz", 2}, | |
c2047754 ILT |
160 | {"ab☺c", "x☺yz", 2}, |
161 | {"a☺b☻c☹d", "cx", len("a☺b☻")}, | |
162 | {"a☺b☻c☹d", "uvw☻xyz", len("a☺b")}, | |
7a938933 ILT |
163 | {"aRegExp*", ".(|)*+?^$[]", 7}, |
164 | {dots + dots + dots, " ", -1}, | |
c2047754 ILT |
165 | {"012abcba210", "\xffb", 4}, |
166 | {"012\x80bcb\x80210", "\xffb", 3}, | |
7a938933 | 167 | } |
c2047754 | 168 | |
ff5f50c5 ILT |
169 | var lastIndexAnyTests = []IndexTest{ |
170 | {"", "", -1}, | |
171 | {"", "a", -1}, | |
172 | {"", "abc", -1}, | |
173 | {"a", "", -1}, | |
174 | {"a", "a", 0}, | |
175 | {"aaa", "a", 2}, | |
176 | {"abc", "xyz", -1}, | |
177 | {"abc", "ab", 1}, | |
c2047754 ILT |
178 | {"ab☺c", "x☺yz", 2}, |
179 | {"a☺b☻c☹d", "cx", len("a☺b☻")}, | |
180 | {"a☺b☻c☹d", "uvw☻xyz", len("a☺b")}, | |
ff5f50c5 ILT |
181 | {"a.RegExp*", ".(|)*+?^$[]", 8}, |
182 | {dots + dots + dots, " ", -1}, | |
c2047754 ILT |
183 | {"012abcba210", "\xffb", 6}, |
184 | {"012\x80bcb\x80210", "\xffb", 7}, | |
ff5f50c5 | 185 | } |
7a938933 ILT |
186 | |
187 | // Execute f on each test case. funcName should be the name of f; it's used | |
188 | // in failure reports. | |
189 | func runIndexTests(t *testing.T, f func(s, sep string) int, funcName string, testCases []IndexTest) { | |
190 | for _, test := range testCases { | |
191 | actual := f(test.s, test.sep) | |
192 | if actual != test.out { | |
193 | t.Errorf("%s(%q,%q) = %v; want %v", funcName, test.s, test.sep, actual, test.out) | |
194 | } | |
195 | } | |
196 | } | |
197 | ||
5a8ea165 ILT |
198 | func TestIndex(t *testing.T) { runIndexTests(t, Index, "Index", indexTests) } |
199 | func TestLastIndex(t *testing.T) { runIndexTests(t, LastIndex, "LastIndex", lastIndexTests) } | |
200 | func TestIndexAny(t *testing.T) { runIndexTests(t, IndexAny, "IndexAny", indexAnyTests) } | |
201 | func TestLastIndexAny(t *testing.T) { | |
202 | runIndexTests(t, LastIndexAny, "LastIndexAny", lastIndexAnyTests) | |
203 | } | |
7a938933 | 204 | |
aa8901e9 ILT |
205 | func TestIndexByte(t *testing.T) { |
206 | for _, tt := range indexTests { | |
207 | if len(tt.sep) != 1 { | |
208 | continue | |
209 | } | |
210 | pos := IndexByte(tt.s, tt.sep[0]) | |
211 | if pos != tt.out { | |
212 | t.Errorf(`IndexByte(%q, %q) = %v; want %v`, tt.s, tt.sep[0], pos, tt.out) | |
213 | } | |
214 | } | |
215 | } | |
216 | ||
af146490 ILT |
217 | func TestLastIndexByte(t *testing.T) { |
218 | testCases := []IndexTest{ | |
219 | {"", "q", -1}, | |
220 | {"abcdef", "q", -1}, | |
221 | {"abcdefabcdef", "a", len("abcdef")}, // something in the middle | |
222 | {"abcdefabcdef", "f", len("abcdefabcde")}, // last byte | |
223 | {"zabcdefabcdef", "z", 0}, // first byte | |
224 | {"a☺b☻c☹d", "b", len("a☺")}, // non-ascii | |
225 | } | |
226 | for _, test := range testCases { | |
227 | actual := LastIndexByte(test.s, test.sep[0]) | |
228 | if actual != test.out { | |
229 | t.Errorf("LastIndexByte(%q,%c) = %v; want %v", test.s, test.sep[0], actual, test.out) | |
230 | } | |
231 | } | |
232 | } | |
233 | ||
22b955cc ILT |
234 | func simpleIndex(s, sep string) int { |
235 | n := len(sep) | |
236 | for i := n; i <= len(s); i++ { | |
237 | if s[i-n:i] == sep { | |
238 | return i - n | |
239 | } | |
240 | } | |
241 | return -1 | |
242 | } | |
243 | ||
244 | func TestIndexRandom(t *testing.T) { | |
245 | const chars = "abcdefghijklmnopqrstuvwxyz0123456789" | |
246 | for times := 0; times < 10; times++ { | |
247 | for strLen := 5 + rand.Intn(5); strLen < 140; strLen += 10 { // Arbitrary | |
248 | s1 := make([]byte, strLen) | |
249 | for i := range s1 { | |
250 | s1[i] = chars[rand.Intn(len(chars))] | |
251 | } | |
252 | s := string(s1) | |
253 | for i := 0; i < 50; i++ { | |
254 | begin := rand.Intn(len(s) + 1) | |
255 | end := begin + rand.Intn(len(s)+1-begin) | |
256 | sep := s[begin:end] | |
257 | if i%4 == 0 { | |
258 | pos := rand.Intn(len(sep) + 1) | |
259 | sep = sep[:pos] + "A" + sep[pos:] | |
260 | } | |
261 | want := simpleIndex(s, sep) | |
262 | res := Index(s, sep) | |
263 | if res != want { | |
264 | t.Errorf("Index(%s,%s) = %d; want %d", s, sep, res, want) | |
265 | } | |
266 | } | |
267 | } | |
268 | } | |
269 | } | |
270 | ||
8039ca76 | 271 | func TestIndexRune(t *testing.T) { |
c2047754 ILT |
272 | tests := []struct { |
273 | in string | |
274 | rune rune | |
275 | want int | |
276 | }{ | |
277 | {"", 'a', -1}, | |
278 | {"", '☺', -1}, | |
279 | {"foo", '☹', -1}, | |
280 | {"foo", 'o', 1}, | |
281 | {"foo☺bar", '☺', 3}, | |
282 | {"foo☺☻☹bar", '☹', 9}, | |
283 | {"a A x", 'A', 2}, | |
284 | {"some_text=some_value", '=', 9}, | |
285 | {"☺a", 'a', 3}, | |
286 | {"a☻☺b", '☺', 4}, | |
287 | ||
288 | // RuneError should match any invalid UTF-8 byte sequence. | |
289 | {"�", '�', 0}, | |
290 | {"\xff", '�', 0}, | |
291 | {"☻x�", '�', len("☻x")}, | |
292 | {"☻x\xe2\x98", '�', len("☻x")}, | |
293 | {"☻x\xe2\x98�", '�', len("☻x")}, | |
294 | {"☻x\xe2\x98x", '�', len("☻x")}, | |
295 | ||
296 | // Invalid rune values should never match. | |
297 | {"a☺b☻c☹d\xe2\x98�\xff�\xed\xa0\x80", -1, -1}, | |
298 | {"a☺b☻c☹d\xe2\x98�\xff�\xed\xa0\x80", 0xD800, -1}, // Surrogate pair | |
299 | {"a☺b☻c☹d\xe2\x98�\xff�\xed\xa0\x80", utf8.MaxRune + 1, -1}, | |
300 | } | |
301 | for _, tt := range tests { | |
302 | if got := IndexRune(tt.in, tt.rune); got != tt.want { | |
303 | t.Errorf("IndexRune(%q, %d) = %v; want %v", tt.in, tt.rune, got, tt.want) | |
304 | } | |
305 | } | |
306 | ||
307 | haystack := "test世界" | |
308 | allocs := testing.AllocsPerRun(1000, func() { | |
309 | if i := IndexRune(haystack, 's'); i != 2 { | |
310 | t.Fatalf("'s' at %d; want 2", i) | |
311 | } | |
312 | if i := IndexRune(haystack, '世'); i != 4 { | |
313 | t.Fatalf("'世' at %d; want 4", i) | |
8039ca76 | 314 | } |
c2047754 ILT |
315 | }) |
316 | if runtime.Compiler == "gccgo" { | |
317 | t.Skip("skipping allocations test for gccgo until escape analysis is enabled") | |
318 | } | |
319 | if allocs != 0 && testing.CoverMode() == "" { | |
320 | t.Errorf("expected no allocations, got %f", allocs) | |
8039ca76 ILT |
321 | } |
322 | } | |
323 | ||
324 | const benchmarkString = "some_text=some☺value" | |
325 | ||
326 | func BenchmarkIndexRune(b *testing.B) { | |
327 | if got := IndexRune(benchmarkString, '☺'); got != 14 { | |
df4aa89a | 328 | b.Fatalf("wrong index: expected 14, got=%d", got) |
8039ca76 ILT |
329 | } |
330 | for i := 0; i < b.N; i++ { | |
331 | IndexRune(benchmarkString, '☺') | |
332 | } | |
333 | } | |
334 | ||
c2047754 ILT |
335 | var benchmarkLongString = Repeat(" ", 100) + benchmarkString |
336 | ||
337 | func BenchmarkIndexRuneLongString(b *testing.B) { | |
338 | if got := IndexRune(benchmarkLongString, '☺'); got != 114 { | |
339 | b.Fatalf("wrong index: expected 114, got=%d", got) | |
340 | } | |
341 | for i := 0; i < b.N; i++ { | |
342 | IndexRune(benchmarkLongString, '☺') | |
343 | } | |
344 | } | |
345 | ||
8039ca76 ILT |
346 | func BenchmarkIndexRuneFastPath(b *testing.B) { |
347 | if got := IndexRune(benchmarkString, 'v'); got != 17 { | |
df4aa89a | 348 | b.Fatalf("wrong index: expected 17, got=%d", got) |
8039ca76 ILT |
349 | } |
350 | for i := 0; i < b.N; i++ { | |
351 | IndexRune(benchmarkString, 'v') | |
352 | } | |
353 | } | |
354 | ||
355 | func BenchmarkIndex(b *testing.B) { | |
356 | if got := Index(benchmarkString, "v"); got != 17 { | |
df4aa89a | 357 | b.Fatalf("wrong index: expected 17, got=%d", got) |
8039ca76 ILT |
358 | } |
359 | for i := 0; i < b.N; i++ { | |
360 | Index(benchmarkString, "v") | |
361 | } | |
362 | } | |
363 | ||
f8d9fa9e ILT |
364 | func BenchmarkLastIndex(b *testing.B) { |
365 | if got := Index(benchmarkString, "v"); got != 17 { | |
366 | b.Fatalf("wrong index: expected 17, got=%d", got) | |
367 | } | |
368 | for i := 0; i < b.N; i++ { | |
369 | LastIndex(benchmarkString, "v") | |
370 | } | |
371 | } | |
372 | ||
f038dae6 ILT |
373 | func BenchmarkIndexByte(b *testing.B) { |
374 | if got := IndexByte(benchmarkString, 'v'); got != 17 { | |
375 | b.Fatalf("wrong index: expected 17, got=%d", got) | |
376 | } | |
377 | for i := 0; i < b.N; i++ { | |
378 | IndexByte(benchmarkString, 'v') | |
379 | } | |
380 | } | |
381 | ||
7a938933 ILT |
382 | type SplitTest struct { |
383 | s string | |
384 | sep string | |
385 | n int | |
386 | a []string | |
387 | } | |
388 | ||
389 | var splittests = []SplitTest{ | |
22b955cc ILT |
390 | {"", "", -1, []string{}}, |
391 | {abcd, "", 2, []string{"a", "bcd"}}, | |
392 | {abcd, "", 4, []string{"a", "b", "c", "d"}}, | |
393 | {abcd, "", -1, []string{"a", "b", "c", "d"}}, | |
394 | {faces, "", -1, []string{"☺", "☻", "☹"}}, | |
395 | {faces, "", 3, []string{"☺", "☻", "☹"}}, | |
396 | {faces, "", 17, []string{"☺", "☻", "☹"}}, | |
397 | {"☺�☹", "", -1, []string{"☺", "�", "☹"}}, | |
7a938933 ILT |
398 | {abcd, "a", 0, nil}, |
399 | {abcd, "a", -1, []string{"", "bcd"}}, | |
400 | {abcd, "z", -1, []string{"abcd"}}, | |
7a938933 ILT |
401 | {commas, ",", -1, []string{"1", "2", "3", "4"}}, |
402 | {dots, "...", -1, []string{"1", ".2", ".3", ".4"}}, | |
403 | {faces, "☹", -1, []string{"☺☻", ""}}, | |
404 | {faces, "~", -1, []string{faces}}, | |
7a938933 ILT |
405 | {"1 2 3 4", " ", 3, []string{"1", "2", "3 4"}}, |
406 | {"1 2", " ", 3, []string{"1", "2"}}, | |
7a938933 ILT |
407 | } |
408 | ||
409 | func TestSplit(t *testing.T) { | |
410 | for _, tt := range splittests { | |
adb0401d | 411 | a := SplitN(tt.s, tt.sep, tt.n) |
7a938933 ILT |
412 | if !eq(a, tt.a) { |
413 | t.Errorf("Split(%q, %q, %d) = %v; want %v", tt.s, tt.sep, tt.n, a, tt.a) | |
414 | continue | |
415 | } | |
416 | if tt.n == 0 { | |
417 | continue | |
418 | } | |
419 | s := Join(a, tt.sep) | |
420 | if s != tt.s { | |
421 | t.Errorf("Join(Split(%q, %q, %d), %q) = %q", tt.s, tt.sep, tt.n, tt.sep, s) | |
422 | } | |
adb0401d ILT |
423 | if tt.n < 0 { |
424 | b := Split(tt.s, tt.sep) | |
425 | if !reflect.DeepEqual(a, b) { | |
426 | t.Errorf("Split disagrees with SplitN(%q, %q, %d) = %v; want %v", tt.s, tt.sep, tt.n, b, a) | |
427 | } | |
428 | } | |
7a938933 ILT |
429 | } |
430 | } | |
431 | ||
432 | var splitaftertests = []SplitTest{ | |
433 | {abcd, "a", -1, []string{"a", "bcd"}}, | |
434 | {abcd, "z", -1, []string{"abcd"}}, | |
435 | {abcd, "", -1, []string{"a", "b", "c", "d"}}, | |
436 | {commas, ",", -1, []string{"1,", "2,", "3,", "4"}}, | |
437 | {dots, "...", -1, []string{"1...", ".2...", ".3...", ".4"}}, | |
438 | {faces, "☹", -1, []string{"☺☻☹", ""}}, | |
439 | {faces, "~", -1, []string{faces}}, | |
440 | {faces, "", -1, []string{"☺", "☻", "☹"}}, | |
441 | {"1 2 3 4", " ", 3, []string{"1 ", "2 ", "3 4"}}, | |
442 | {"1 2 3", " ", 3, []string{"1 ", "2 ", "3"}}, | |
443 | {"1 2", " ", 3, []string{"1 ", "2"}}, | |
444 | {"123", "", 2, []string{"1", "23"}}, | |
445 | {"123", "", 17, []string{"1", "2", "3"}}, | |
446 | } | |
447 | ||
448 | func TestSplitAfter(t *testing.T) { | |
449 | for _, tt := range splitaftertests { | |
adb0401d | 450 | a := SplitAfterN(tt.s, tt.sep, tt.n) |
7a938933 ILT |
451 | if !eq(a, tt.a) { |
452 | t.Errorf(`Split(%q, %q, %d) = %v; want %v`, tt.s, tt.sep, tt.n, a, tt.a) | |
453 | continue | |
454 | } | |
455 | s := Join(a, "") | |
456 | if s != tt.s { | |
457 | t.Errorf(`Join(Split(%q, %q, %d), %q) = %q`, tt.s, tt.sep, tt.n, tt.sep, s) | |
458 | } | |
adb0401d ILT |
459 | if tt.n < 0 { |
460 | b := SplitAfter(tt.s, tt.sep) | |
461 | if !reflect.DeepEqual(a, b) { | |
462 | t.Errorf("SplitAfter disagrees with SplitAfterN(%q, %q, %d) = %v; want %v", tt.s, tt.sep, tt.n, b, a) | |
463 | } | |
464 | } | |
7a938933 ILT |
465 | } |
466 | } | |
467 | ||
468 | type FieldsTest struct { | |
469 | s string | |
470 | a []string | |
471 | } | |
472 | ||
473 | var fieldstests = []FieldsTest{ | |
474 | {"", []string{}}, | |
475 | {" ", []string{}}, | |
476 | {" \t ", []string{}}, | |
bc998d03 | 477 | {"\u2000", []string{}}, |
7a938933 ILT |
478 | {" abc ", []string{"abc"}}, |
479 | {"1 2 3 4", []string{"1", "2", "3", "4"}}, | |
480 | {"1 2 3 4", []string{"1", "2", "3", "4"}}, | |
481 | {"1\t\t2\t\t3\t4", []string{"1", "2", "3", "4"}}, | |
482 | {"1\u20002\u20013\u20024", []string{"1", "2", "3", "4"}}, | |
483 | {"\u2000\u2001\u2002", []string{}}, | |
484 | {"\n™\t™\n", []string{"™", "™"}}, | |
bc998d03 ILT |
485 | {"\n\u20001™2\u2000 \u2001 ™", []string{"1™2", "™"}}, |
486 | {"\n1\uFFFD \uFFFD2\u20003\uFFFD4", []string{"1\uFFFD", "\uFFFD2", "3\uFFFD4"}}, | |
487 | {"1\xFF\u2000\xFF2\xFF \xFF", []string{"1\xFF", "\xFF2\xFF", "\xFF"}}, | |
7a938933 ILT |
488 | {faces, []string{faces}}, |
489 | } | |
490 | ||
491 | func TestFields(t *testing.T) { | |
492 | for _, tt := range fieldstests { | |
493 | a := Fields(tt.s) | |
494 | if !eq(a, tt.a) { | |
495 | t.Errorf("Fields(%q) = %v; want %v", tt.s, a, tt.a) | |
496 | continue | |
497 | } | |
498 | } | |
499 | } | |
500 | ||
d8f41257 ILT |
501 | var FieldsFuncTests = []FieldsTest{ |
502 | {"", []string{}}, | |
503 | {"XX", []string{}}, | |
504 | {"XXhiXXX", []string{"hi"}}, | |
505 | {"aXXbXXXcX", []string{"a", "b", "c"}}, | |
506 | } | |
507 | ||
7a938933 | 508 | func TestFieldsFunc(t *testing.T) { |
4ccad563 ILT |
509 | for _, tt := range fieldstests { |
510 | a := FieldsFunc(tt.s, unicode.IsSpace) | |
511 | if !eq(a, tt.a) { | |
512 | t.Errorf("FieldsFunc(%q, unicode.IsSpace) = %v; want %v", tt.s, a, tt.a) | |
513 | continue | |
514 | } | |
515 | } | |
506cf9aa | 516 | pred := func(c rune) bool { return c == 'X' } |
d8f41257 | 517 | for _, tt := range FieldsFuncTests { |
7a938933 ILT |
518 | a := FieldsFunc(tt.s, pred) |
519 | if !eq(a, tt.a) { | |
520 | t.Errorf("FieldsFunc(%q) = %v, want %v", tt.s, a, tt.a) | |
521 | } | |
522 | } | |
523 | } | |
524 | ||
7a938933 ILT |
525 | // Test case for any function which accepts and returns a single string. |
526 | type StringTest struct { | |
527 | in, out string | |
528 | } | |
529 | ||
530 | // Execute f on each test case. funcName should be the name of f; it's used | |
531 | // in failure reports. | |
532 | func runStringTests(t *testing.T, f func(string) string, funcName string, testCases []StringTest) { | |
533 | for _, tc := range testCases { | |
534 | actual := f(tc.in) | |
535 | if actual != tc.out { | |
536 | t.Errorf("%s(%q) = %q; want %q", funcName, tc.in, actual, tc.out) | |
537 | } | |
538 | } | |
539 | } | |
540 | ||
541 | var upperTests = []StringTest{ | |
542 | {"", ""}, | |
1a2f01ef | 543 | {"ONLYUPPER", "ONLYUPPER"}, |
7a938933 ILT |
544 | {"abc", "ABC"}, |
545 | {"AbC123", "ABC123"}, | |
546 | {"azAZ09_", "AZAZ09_"}, | |
1a2f01ef ILT |
547 | {"longStrinGwitHmixofsmaLLandcAps", "LONGSTRINGWITHMIXOFSMALLANDCAPS"}, |
548 | {"long\u0250string\u0250with\u0250nonascii\u2C6Fchars", "LONG\u2C6FSTRING\u2C6FWITH\u2C6FNONASCII\u2C6FCHARS"}, | |
7a938933 | 549 | {"\u0250\u0250\u0250\u0250\u0250", "\u2C6F\u2C6F\u2C6F\u2C6F\u2C6F"}, // grows one byte per char |
c9492649 | 550 | {"a\u0080\U0010FFFF", "A\u0080\U0010FFFF"}, // test utf8.RuneSelf and utf8.MaxRune |
7a938933 ILT |
551 | } |
552 | ||
553 | var lowerTests = []StringTest{ | |
554 | {"", ""}, | |
555 | {"abc", "abc"}, | |
556 | {"AbC123", "abc123"}, | |
557 | {"azAZ09_", "azaz09_"}, | |
1a2f01ef ILT |
558 | {"longStrinGwitHmixofsmaLLandcAps", "longstringwithmixofsmallandcaps"}, |
559 | {"LONG\u2C6FSTRING\u2C6FWITH\u2C6FNONASCII\u2C6FCHARS", "long\u0250string\u0250with\u0250nonascii\u0250chars"}, | |
7a938933 | 560 | {"\u2C6D\u2C6D\u2C6D\u2C6D\u2C6D", "\u0251\u0251\u0251\u0251\u0251"}, // shrinks one byte per char |
c9492649 | 561 | {"A\u0080\U0010FFFF", "a\u0080\U0010FFFF"}, // test utf8.RuneSelf and utf8.MaxRune |
7a938933 ILT |
562 | } |
563 | ||
564 | const space = "\t\v\r\f\n\u0085\u00a0\u2000\u3000" | |
565 | ||
566 | var trimSpaceTests = []StringTest{ | |
567 | {"", ""}, | |
568 | {"abc", "abc"}, | |
569 | {space + "abc" + space, "abc"}, | |
570 | {" ", ""}, | |
571 | {" \t\r\n \t\t\r\r\n\n ", ""}, | |
572 | {" \t\r\n x\t\t\r\r\n\n ", "x"}, | |
573 | {" \u2000\t\r\n x\t\t\r\r\ny\n \u3000", "x\t\t\r\r\ny"}, | |
574 | {"1 \t\r\n2", "1 \t\r\n2"}, | |
575 | {" x\x80", "x\x80"}, | |
576 | {" x\xc0", "x\xc0"}, | |
577 | {"x \xc0\xc0 ", "x \xc0\xc0"}, | |
578 | {"x \xc0", "x \xc0"}, | |
579 | {"x \xc0 ", "x \xc0"}, | |
580 | {"x \xc0\xc0 ", "x \xc0\xc0"}, | |
581 | {"x ☺\xc0\xc0 ", "x ☺\xc0\xc0"}, | |
582 | {"x ☺ ", "x ☺"}, | |
583 | } | |
584 | ||
506cf9aa ILT |
585 | func tenRunes(ch rune) string { |
586 | r := make([]rune, 10) | |
7a938933 | 587 | for i := range r { |
506cf9aa | 588 | r[i] = ch |
7a938933 ILT |
589 | } |
590 | return string(r) | |
591 | } | |
592 | ||
593 | // User-defined self-inverse mapping function | |
506cf9aa ILT |
594 | func rot13(r rune) rune { |
595 | step := rune(13) | |
596 | if r >= 'a' && r <= 'z' { | |
597 | return ((r - 'a' + step) % 26) + 'a' | |
7a938933 | 598 | } |
506cf9aa ILT |
599 | if r >= 'A' && r <= 'Z' { |
600 | return ((r - 'A' + step) % 26) + 'A' | |
7a938933 | 601 | } |
506cf9aa | 602 | return r |
7a938933 ILT |
603 | } |
604 | ||
605 | func TestMap(t *testing.T) { | |
606 | // Run a couple of awful growth/shrinkage tests | |
607 | a := tenRunes('a') | |
22b955cc | 608 | // 1. Grow. This triggers two reallocations in Map. |
506cf9aa | 609 | maxRune := func(rune) rune { return unicode.MaxRune } |
7a938933 ILT |
610 | m := Map(maxRune, a) |
611 | expect := tenRunes(unicode.MaxRune) | |
612 | if m != expect { | |
613 | t.Errorf("growing: expected %q got %q", expect, m) | |
614 | } | |
615 | ||
616 | // 2. Shrink | |
506cf9aa | 617 | minRune := func(rune) rune { return 'a' } |
7a938933 ILT |
618 | m = Map(minRune, tenRunes(unicode.MaxRune)) |
619 | expect = a | |
620 | if m != expect { | |
621 | t.Errorf("shrinking: expected %q got %q", expect, m) | |
622 | } | |
623 | ||
624 | // 3. Rot13 | |
625 | m = Map(rot13, "a to zed") | |
626 | expect = "n gb mrq" | |
627 | if m != expect { | |
628 | t.Errorf("rot13: expected %q got %q", expect, m) | |
629 | } | |
630 | ||
631 | // 4. Rot13^2 | |
632 | m = Map(rot13, Map(rot13, "a to zed")) | |
633 | expect = "a to zed" | |
634 | if m != expect { | |
635 | t.Errorf("rot13: expected %q got %q", expect, m) | |
636 | } | |
637 | ||
638 | // 5. Drop | |
506cf9aa ILT |
639 | dropNotLatin := func(r rune) rune { |
640 | if unicode.Is(unicode.Latin, r) { | |
641 | return r | |
7a938933 ILT |
642 | } |
643 | return -1 | |
644 | } | |
645 | m = Map(dropNotLatin, "Hello, 세계") | |
646 | expect = "Hello" | |
647 | if m != expect { | |
648 | t.Errorf("drop: expected %q got %q", expect, m) | |
649 | } | |
f72f4169 ILT |
650 | |
651 | // 6. Identity | |
506cf9aa ILT |
652 | identity := func(r rune) rune { |
653 | return r | |
f72f4169 ILT |
654 | } |
655 | orig := "Input string that we expect not to be copied." | |
656 | m = Map(identity, orig) | |
657 | if (*reflect.StringHeader)(unsafe.Pointer(&orig)).Data != | |
658 | (*reflect.StringHeader)(unsafe.Pointer(&m)).Data { | |
659 | t.Error("unexpected copy during identity map") | |
660 | } | |
bc998d03 ILT |
661 | |
662 | // 7. Handle invalid UTF-8 sequence | |
663 | replaceNotLatin := func(r rune) rune { | |
664 | if unicode.Is(unicode.Latin, r) { | |
665 | return r | |
666 | } | |
4f4a855d | 667 | return utf8.RuneError |
bc998d03 ILT |
668 | } |
669 | m = Map(replaceNotLatin, "Hello\255World") | |
4f4a855d | 670 | expect = "Hello\uFFFDWorld" |
bc998d03 ILT |
671 | if m != expect { |
672 | t.Errorf("replace invalid sequence: expected %q got %q", expect, m) | |
673 | } | |
c9492649 ILT |
674 | |
675 | // 8. Check utf8.RuneSelf and utf8.MaxRune encoding | |
676 | encode := func(r rune) rune { | |
677 | switch r { | |
678 | case utf8.RuneSelf: | |
679 | return unicode.MaxRune | |
680 | case unicode.MaxRune: | |
681 | return utf8.RuneSelf | |
682 | } | |
683 | return r | |
684 | } | |
685 | s := string(utf8.RuneSelf) + string(utf8.MaxRune) | |
686 | r := string(utf8.MaxRune) + string(utf8.RuneSelf) // reverse of s | |
687 | m = Map(encode, s) | |
688 | if m != r { | |
689 | t.Errorf("encoding not handled correctly: expected %q got %q", r, m) | |
690 | } | |
691 | m = Map(encode, r) | |
692 | if m != s { | |
693 | t.Errorf("encoding not handled correctly: expected %q got %q", s, m) | |
694 | } | |
4f4a855d ILT |
695 | |
696 | // 9. Check mapping occurs in the front, middle and back | |
697 | trimSpaces := func(r rune) rune { | |
698 | if unicode.IsSpace(r) { | |
699 | return -1 | |
700 | } | |
701 | return r | |
702 | } | |
703 | m = Map(trimSpaces, " abc 123 ") | |
704 | expect = "abc123" | |
705 | if m != expect { | |
706 | t.Errorf("trimSpaces: expected %q got %q", expect, m) | |
707 | } | |
7a938933 ILT |
708 | } |
709 | ||
710 | func TestToUpper(t *testing.T) { runStringTests(t, ToUpper, "ToUpper", upperTests) } | |
711 | ||
712 | func TestToLower(t *testing.T) { runStringTests(t, ToLower, "ToLower", lowerTests) } | |
713 | ||
aa8901e9 ILT |
714 | var toValidUTF8Tests = []struct { |
715 | in string | |
716 | repl string | |
717 | out string | |
718 | }{ | |
719 | {"", "\uFFFD", ""}, | |
720 | {"abc", "\uFFFD", "abc"}, | |
721 | {"\uFDDD", "\uFFFD", "\uFDDD"}, | |
722 | {"a\xffb", "\uFFFD", "a\uFFFDb"}, | |
723 | {"a\xffb\uFFFD", "X", "aXb\uFFFD"}, | |
724 | {"a☺\xffb☺\xC0\xAFc☺\xff", "", "a☺b☺c☺"}, | |
725 | {"a☺\xffb☺\xC0\xAFc☺\xff", "日本語", "a☺日本語b☺日本語c☺日本語"}, | |
726 | {"\xC0\xAF", "\uFFFD", "\uFFFD"}, | |
727 | {"\xE0\x80\xAF", "\uFFFD", "\uFFFD"}, | |
728 | {"\xed\xa0\x80", "abc", "abc"}, | |
729 | {"\xed\xbf\xbf", "\uFFFD", "\uFFFD"}, | |
730 | {"\xF0\x80\x80\xaf", "☺", "☺"}, | |
731 | {"\xF8\x80\x80\x80\xAF", "\uFFFD", "\uFFFD"}, | |
732 | {"\xFC\x80\x80\x80\x80\xAF", "\uFFFD", "\uFFFD"}, | |
733 | } | |
734 | ||
735 | func TestToValidUTF8(t *testing.T) { | |
736 | for _, tc := range toValidUTF8Tests { | |
737 | got := ToValidUTF8(tc.in, tc.repl) | |
738 | if got != tc.out { | |
739 | t.Errorf("ToValidUTF8(%q, %q) = %q; want %q", tc.in, tc.repl, got, tc.out) | |
740 | } | |
741 | } | |
742 | } | |
743 | ||
1a2f01ef ILT |
744 | func BenchmarkToUpper(b *testing.B) { |
745 | for _, tc := range upperTests { | |
746 | b.Run(tc.in, func(b *testing.B) { | |
747 | for i := 0; i < b.N; i++ { | |
748 | actual := ToUpper(tc.in) | |
749 | if actual != tc.out { | |
750 | b.Errorf("ToUpper(%q) = %q; want %q", tc.in, actual, tc.out) | |
751 | } | |
752 | } | |
753 | }) | |
754 | } | |
755 | } | |
756 | ||
757 | func BenchmarkToLower(b *testing.B) { | |
758 | for _, tc := range lowerTests { | |
759 | b.Run(tc.in, func(b *testing.B) { | |
760 | for i := 0; i < b.N; i++ { | |
761 | actual := ToLower(tc.in) | |
762 | if actual != tc.out { | |
763 | b.Errorf("ToLower(%q) = %q; want %q", tc.in, actual, tc.out) | |
764 | } | |
765 | } | |
766 | }) | |
767 | } | |
768 | } | |
769 | ||
f72f4169 | 770 | func BenchmarkMapNoChanges(b *testing.B) { |
506cf9aa ILT |
771 | identity := func(r rune) rune { |
772 | return r | |
f72f4169 ILT |
773 | } |
774 | for i := 0; i < b.N; i++ { | |
775 | Map(identity, "Some string that won't be modified.") | |
776 | } | |
777 | } | |
778 | ||
7a938933 ILT |
779 | func TestSpecialCase(t *testing.T) { |
780 | lower := "abcçdefgğhıijklmnoöprsştuüvyz" | |
781 | upper := "ABCÇDEFGĞHIİJKLMNOÖPRSŞTUÜVYZ" | |
782 | u := ToUpperSpecial(unicode.TurkishCase, upper) | |
783 | if u != upper { | |
784 | t.Errorf("Upper(upper) is %s not %s", u, upper) | |
785 | } | |
786 | u = ToUpperSpecial(unicode.TurkishCase, lower) | |
787 | if u != upper { | |
788 | t.Errorf("Upper(lower) is %s not %s", u, upper) | |
789 | } | |
790 | l := ToLowerSpecial(unicode.TurkishCase, lower) | |
791 | if l != lower { | |
792 | t.Errorf("Lower(lower) is %s not %s", l, lower) | |
793 | } | |
794 | l = ToLowerSpecial(unicode.TurkishCase, upper) | |
795 | if l != lower { | |
796 | t.Errorf("Lower(upper) is %s not %s", l, lower) | |
797 | } | |
798 | } | |
799 | ||
800 | func TestTrimSpace(t *testing.T) { runStringTests(t, TrimSpace, "TrimSpace", trimSpaceTests) } | |
801 | ||
d8f41257 | 802 | var trimTests = []struct { |
be47d6ec ILT |
803 | f string |
804 | in, arg, out string | |
d8f41257 | 805 | }{ |
ab61e9c4 ILT |
806 | {"Trim", "abba", "a", "bb"}, |
807 | {"Trim", "abba", "ab", ""}, | |
808 | {"TrimLeft", "abba", "ab", ""}, | |
809 | {"TrimRight", "abba", "ab", ""}, | |
810 | {"TrimLeft", "abba", "a", "bba"}, | |
811 | {"TrimRight", "abba", "a", "abb"}, | |
812 | {"Trim", "<tag>", "<>", "tag"}, | |
813 | {"Trim", "* listitem", " *", "listitem"}, | |
814 | {"Trim", `"quote"`, `"`, "quote"}, | |
815 | {"Trim", "\u2C6F\u2C6F\u0250\u0250\u2C6F\u2C6F", "\u2C6F", "\u0250\u0250"}, | |
c2047754 ILT |
816 | {"Trim", "\x80test\xff", "\xff", "test"}, |
817 | {"Trim", " Ġ ", " ", "Ġ"}, | |
818 | {"Trim", " Ġİ0", "0 ", "Ġİ"}, | |
7a938933 | 819 | //empty string tests |
ab61e9c4 ILT |
820 | {"Trim", "abba", "", "abba"}, |
821 | {"Trim", "", "123", ""}, | |
822 | {"Trim", "", "", ""}, | |
823 | {"TrimLeft", "abba", "", "abba"}, | |
824 | {"TrimLeft", "", "123", ""}, | |
825 | {"TrimLeft", "", "", ""}, | |
826 | {"TrimRight", "abba", "", "abba"}, | |
827 | {"TrimRight", "", "123", ""}, | |
828 | {"TrimRight", "", "", ""}, | |
829 | {"TrimRight", "☺\xc0", "☺", "☺\xc0"}, | |
be47d6ec ILT |
830 | {"TrimPrefix", "aabb", "a", "abb"}, |
831 | {"TrimPrefix", "aabb", "b", "aabb"}, | |
832 | {"TrimSuffix", "aabb", "a", "aabb"}, | |
833 | {"TrimSuffix", "aabb", "b", "aab"}, | |
7a938933 ILT |
834 | } |
835 | ||
836 | func TestTrim(t *testing.T) { | |
837 | for _, tc := range trimTests { | |
ab61e9c4 ILT |
838 | name := tc.f |
839 | var f func(string, string) string | |
840 | switch name { | |
841 | case "Trim": | |
842 | f = Trim | |
843 | case "TrimLeft": | |
844 | f = TrimLeft | |
845 | case "TrimRight": | |
846 | f = TrimRight | |
be47d6ec ILT |
847 | case "TrimPrefix": |
848 | f = TrimPrefix | |
849 | case "TrimSuffix": | |
850 | f = TrimSuffix | |
7a938933 | 851 | default: |
7b1c3dd9 | 852 | t.Errorf("Undefined trim function %s", name) |
7a938933 | 853 | } |
be47d6ec | 854 | actual := f(tc.in, tc.arg) |
7a938933 | 855 | if actual != tc.out { |
be47d6ec | 856 | t.Errorf("%s(%q, %q) = %q; want %q", name, tc.in, tc.arg, actual, tc.out) |
7a938933 ILT |
857 | } |
858 | } | |
859 | } | |
860 | ||
af146490 ILT |
861 | func BenchmarkTrim(b *testing.B) { |
862 | b.ReportAllocs() | |
863 | ||
864 | for i := 0; i < b.N; i++ { | |
865 | for _, tc := range trimTests { | |
866 | name := tc.f | |
867 | var f func(string, string) string | |
868 | switch name { | |
869 | case "Trim": | |
870 | f = Trim | |
871 | case "TrimLeft": | |
872 | f = TrimLeft | |
873 | case "TrimRight": | |
874 | f = TrimRight | |
875 | case "TrimPrefix": | |
876 | f = TrimPrefix | |
877 | case "TrimSuffix": | |
878 | f = TrimSuffix | |
879 | default: | |
880 | b.Errorf("Undefined trim function %s", name) | |
881 | } | |
882 | actual := f(tc.in, tc.arg) | |
883 | if actual != tc.out { | |
884 | b.Errorf("%s(%q, %q) = %q; want %q", name, tc.in, tc.arg, actual, tc.out) | |
885 | } | |
886 | } | |
887 | } | |
888 | } | |
889 | ||
aa8901e9 ILT |
890 | func BenchmarkToValidUTF8(b *testing.B) { |
891 | tests := []struct { | |
892 | name string | |
893 | input string | |
894 | }{ | |
895 | {"Valid", "typical"}, | |
896 | {"InvalidASCII", "foo\xffbar"}, | |
897 | {"InvalidNonASCII", "日本語\xff日本語"}, | |
898 | } | |
899 | replacement := "\uFFFD" | |
900 | b.ResetTimer() | |
901 | for _, test := range tests { | |
902 | b.Run(test.name, func(b *testing.B) { | |
903 | for i := 0; i < b.N; i++ { | |
904 | ToValidUTF8(test.input, replacement) | |
905 | } | |
906 | }) | |
907 | } | |
908 | } | |
909 | ||
7a938933 | 910 | type predicate struct { |
506cf9aa | 911 | f func(rune) bool |
7a938933 ILT |
912 | name string |
913 | } | |
914 | ||
915 | var isSpace = predicate{unicode.IsSpace, "IsSpace"} | |
916 | var isDigit = predicate{unicode.IsDigit, "IsDigit"} | |
917 | var isUpper = predicate{unicode.IsUpper, "IsUpper"} | |
918 | var isValidRune = predicate{ | |
506cf9aa | 919 | func(r rune) bool { |
7a938933 ILT |
920 | return r != utf8.RuneError |
921 | }, | |
922 | "IsValidRune", | |
923 | } | |
924 | ||
7a938933 ILT |
925 | func not(p predicate) predicate { |
926 | return predicate{ | |
506cf9aa | 927 | func(r rune) bool { |
7a938933 ILT |
928 | return !p.f(r) |
929 | }, | |
930 | "not " + p.name, | |
931 | } | |
932 | } | |
933 | ||
d8f41257 | 934 | var trimFuncTests = []struct { |
aa8901e9 ILT |
935 | f predicate |
936 | in string | |
937 | trimOut string | |
938 | leftOut string | |
939 | rightOut string | |
d8f41257 | 940 | }{ |
aa8901e9 ILT |
941 | {isSpace, space + " hello " + space, |
942 | "hello", | |
943 | "hello " + space, | |
944 | space + " hello"}, | |
945 | {isDigit, "\u0e50\u0e5212hello34\u0e50\u0e51", | |
946 | "hello", | |
947 | "hello34\u0e50\u0e51", | |
948 | "\u0e50\u0e5212hello"}, | |
949 | {isUpper, "\u2C6F\u2C6F\u2C6F\u2C6FABCDhelloEF\u2C6F\u2C6FGH\u2C6F\u2C6F", | |
950 | "hello", | |
951 | "helloEF\u2C6F\u2C6FGH\u2C6F\u2C6F", | |
952 | "\u2C6F\u2C6F\u2C6F\u2C6FABCDhello"}, | |
953 | {not(isSpace), "hello" + space + "hello", | |
954 | space, | |
955 | space + "hello", | |
956 | "hello" + space}, | |
957 | {not(isDigit), "hello\u0e50\u0e521234\u0e50\u0e51helo", | |
958 | "\u0e50\u0e521234\u0e50\u0e51", | |
959 | "\u0e50\u0e521234\u0e50\u0e51helo", | |
960 | "hello\u0e50\u0e521234\u0e50\u0e51"}, | |
961 | {isValidRune, "ab\xc0a\xc0cd", | |
962 | "\xc0a\xc0", | |
963 | "\xc0a\xc0cd", | |
964 | "ab\xc0a\xc0"}, | |
965 | {not(isValidRune), "\xc0a\xc0", | |
966 | "a", | |
967 | "a\xc0", | |
968 | "\xc0a"}, | |
969 | {isSpace, "", | |
970 | "", | |
971 | "", | |
972 | ""}, | |
973 | {isSpace, " ", | |
974 | "", | |
975 | "", | |
976 | ""}, | |
7a938933 ILT |
977 | } |
978 | ||
979 | func TestTrimFunc(t *testing.T) { | |
980 | for _, tc := range trimFuncTests { | |
aa8901e9 ILT |
981 | trimmers := []struct { |
982 | name string | |
983 | trim func(s string, f func(r rune) bool) string | |
984 | out string | |
985 | }{ | |
986 | {"TrimFunc", TrimFunc, tc.trimOut}, | |
987 | {"TrimLeftFunc", TrimLeftFunc, tc.leftOut}, | |
988 | {"TrimRightFunc", TrimRightFunc, tc.rightOut}, | |
989 | } | |
990 | for _, trimmer := range trimmers { | |
991 | actual := trimmer.trim(tc.in, tc.f.f) | |
992 | if actual != trimmer.out { | |
993 | t.Errorf("%s(%q, %q) = %q; want %q", trimmer.name, tc.in, tc.f.name, actual, trimmer.out) | |
994 | } | |
7a938933 ILT |
995 | } |
996 | } | |
997 | } | |
998 | ||
d8f41257 | 999 | var indexFuncTests = []struct { |
7a938933 ILT |
1000 | in string |
1001 | f predicate | |
1002 | first, last int | |
d8f41257 | 1003 | }{ |
7a938933 ILT |
1004 | {"", isValidRune, -1, -1}, |
1005 | {"abc", isDigit, -1, -1}, | |
1006 | {"0123", isDigit, 0, 3}, | |
1007 | {"a1b", isDigit, 1, 1}, | |
1008 | {space, isSpace, 0, len(space) - 3}, // last rune in space is 3 bytes | |
1009 | {"\u0e50\u0e5212hello34\u0e50\u0e51", isDigit, 0, 18}, | |
1010 | {"\u2C6F\u2C6F\u2C6F\u2C6FABCDhelloEF\u2C6F\u2C6FGH\u2C6F\u2C6F", isUpper, 0, 34}, | |
1011 | {"12\u0e50\u0e52hello34\u0e50\u0e51", not(isDigit), 8, 12}, | |
1012 | ||
1013 | // tests of invalid UTF-8 | |
1014 | {"\x801", isDigit, 1, 1}, | |
1015 | {"\x80abc", isDigit, -1, -1}, | |
1016 | {"\xc0a\xc0", isValidRune, 1, 1}, | |
1017 | {"\xc0a\xc0", not(isValidRune), 0, 2}, | |
1018 | {"\xc0☺\xc0", not(isValidRune), 0, 4}, | |
1019 | {"\xc0☺\xc0\xc0", not(isValidRune), 0, 5}, | |
1020 | {"ab\xc0a\xc0cd", not(isValidRune), 2, 4}, | |
1021 | {"a\xe0\x80cd", not(isValidRune), 1, 2}, | |
1022 | {"\x80\x80\x80\x80", not(isValidRune), 0, 3}, | |
1023 | } | |
1024 | ||
1025 | func TestIndexFunc(t *testing.T) { | |
1026 | for _, tc := range indexFuncTests { | |
1027 | first := IndexFunc(tc.in, tc.f.f) | |
1028 | if first != tc.first { | |
1029 | t.Errorf("IndexFunc(%q, %s) = %d; want %d", tc.in, tc.f.name, first, tc.first) | |
1030 | } | |
1031 | last := LastIndexFunc(tc.in, tc.f.f) | |
1032 | if last != tc.last { | |
1033 | t.Errorf("LastIndexFunc(%q, %s) = %d; want %d", tc.in, tc.f.name, last, tc.last) | |
1034 | } | |
1035 | } | |
1036 | } | |
1037 | ||
1038 | func equal(m string, s1, s2 string, t *testing.T) bool { | |
1039 | if s1 == s2 { | |
1040 | return true | |
1041 | } | |
adb0401d ILT |
1042 | e1 := Split(s1, "") |
1043 | e2 := Split(s2, "") | |
7a938933 | 1044 | for i, c1 := range e1 { |
00d86ac9 | 1045 | if i >= len(e2) { |
7a938933 ILT |
1046 | break |
1047 | } | |
1048 | r1, _ := utf8.DecodeRuneInString(c1) | |
1049 | r2, _ := utf8.DecodeRuneInString(e2[i]) | |
1050 | if r1 != r2 { | |
1051 | t.Errorf("%s diff at %d: U+%04X U+%04X", m, i, r1, r2) | |
1052 | } | |
1053 | } | |
1054 | return false | |
1055 | } | |
1056 | ||
1057 | func TestCaseConsistency(t *testing.T) { | |
1058 | // Make a string of all the runes. | |
9a0e3259 | 1059 | numRunes := int(unicode.MaxRune + 1) |
f72f4169 ILT |
1060 | if testing.Short() { |
1061 | numRunes = 1000 | |
1062 | } | |
506cf9aa | 1063 | a := make([]rune, numRunes) |
7a938933 | 1064 | for i := range a { |
506cf9aa | 1065 | a[i] = rune(i) |
7a938933 ILT |
1066 | } |
1067 | s := string(a) | |
1068 | // convert the cases. | |
1069 | upper := ToUpper(s) | |
1070 | lower := ToLower(s) | |
1071 | ||
1072 | // Consistency checks | |
f72f4169 | 1073 | if n := utf8.RuneCountInString(upper); n != numRunes { |
7a938933 ILT |
1074 | t.Error("rune count wrong in upper:", n) |
1075 | } | |
f72f4169 | 1076 | if n := utf8.RuneCountInString(lower); n != numRunes { |
7a938933 ILT |
1077 | t.Error("rune count wrong in lower:", n) |
1078 | } | |
1079 | if !equal("ToUpper(upper)", ToUpper(upper), upper, t) { | |
1080 | t.Error("ToUpper(upper) consistency fail") | |
1081 | } | |
1082 | if !equal("ToLower(lower)", ToLower(lower), lower, t) { | |
1083 | t.Error("ToLower(lower) consistency fail") | |
1084 | } | |
1085 | /* | |
1086 | These fail because of non-one-to-oneness of the data, such as multiple | |
1087 | upper case 'I' mapping to 'i'. We comment them out but keep them for | |
1088 | interest. | |
1089 | For instance: CAPITAL LETTER I WITH DOT ABOVE: | |
1090 | unicode.ToUpper(unicode.ToLower('\u0130')) != '\u0130' | |
1091 | ||
1092 | if !equal("ToUpper(lower)", ToUpper(lower), upper, t) { | |
1093 | t.Error("ToUpper(lower) consistency fail"); | |
1094 | } | |
1095 | if !equal("ToLower(upper)", ToLower(upper), lower, t) { | |
1096 | t.Error("ToLower(upper) consistency fail"); | |
1097 | } | |
1098 | */ | |
1099 | } | |
1100 | ||
d8f41257 | 1101 | var RepeatTests = []struct { |
7a938933 ILT |
1102 | in, out string |
1103 | count int | |
d8f41257 | 1104 | }{ |
7a938933 ILT |
1105 | {"", "", 0}, |
1106 | {"", "", 1}, | |
1107 | {"", "", 2}, | |
1108 | {"-", "", 0}, | |
1109 | {"-", "-", 1}, | |
1110 | {"-", "----------", 10}, | |
1111 | {"abc ", "abc abc abc ", 3}, | |
1112 | } | |
1113 | ||
1114 | func TestRepeat(t *testing.T) { | |
1115 | for _, tt := range RepeatTests { | |
1116 | a := Repeat(tt.in, tt.count) | |
1117 | if !equal("Repeat(s)", a, tt.out, t) { | |
1118 | t.Errorf("Repeat(%v, %d) = %v; want %v", tt.in, tt.count, a, tt.out) | |
1119 | continue | |
1120 | } | |
1121 | } | |
1122 | } | |
1123 | ||
c2047754 ILT |
1124 | func repeat(s string, count int) (err error) { |
1125 | defer func() { | |
1126 | if r := recover(); r != nil { | |
1127 | switch v := r.(type) { | |
1128 | case error: | |
1129 | err = v | |
1130 | default: | |
1131 | err = fmt.Errorf("%s", v) | |
1132 | } | |
1133 | } | |
1134 | }() | |
1135 | ||
1136 | Repeat(s, count) | |
1137 | ||
1138 | return | |
1139 | } | |
1140 | ||
1141 | // See Issue golang.org/issue/16237 | |
1142 | func TestRepeatCatchesOverflow(t *testing.T) { | |
1143 | tests := [...]struct { | |
1144 | s string | |
1145 | count int | |
1146 | errStr string | |
1147 | }{ | |
1148 | 0: {"--", -2147483647, "negative"}, | |
1149 | 1: {"", int(^uint(0) >> 1), ""}, | |
1150 | 2: {"-", 10, ""}, | |
1151 | 3: {"gopher", 0, ""}, | |
1152 | 4: {"-", -1, "negative"}, | |
1153 | 5: {"--", -102, "negative"}, | |
1154 | 6: {string(make([]byte, 255)), int((^uint(0))/255 + 1), "overflow"}, | |
1155 | } | |
1156 | ||
1157 | for i, tt := range tests { | |
1158 | err := repeat(tt.s, tt.count) | |
1159 | if tt.errStr == "" { | |
1160 | if err != nil { | |
1161 | t.Errorf("#%d panicked %v", i, err) | |
1162 | } | |
1163 | continue | |
1164 | } | |
1165 | ||
1166 | if err == nil || !Contains(err.Error(), tt.errStr) { | |
1167 | t.Errorf("#%d expected %q got %q", i, tt.errStr, err) | |
1168 | } | |
1169 | } | |
1170 | } | |
1171 | ||
506cf9aa | 1172 | func runesEqual(a, b []rune) bool { |
7a938933 ILT |
1173 | if len(a) != len(b) { |
1174 | return false | |
1175 | } | |
1176 | for i, r := range a { | |
1177 | if r != b[i] { | |
1178 | return false | |
1179 | } | |
1180 | } | |
1181 | return true | |
1182 | } | |
1183 | ||
d8f41257 | 1184 | var RunesTests = []struct { |
7a938933 | 1185 | in string |
506cf9aa | 1186 | out []rune |
7a938933 | 1187 | lossy bool |
d8f41257 | 1188 | }{ |
506cf9aa ILT |
1189 | {"", []rune{}, false}, |
1190 | {" ", []rune{32}, false}, | |
1191 | {"ABC", []rune{65, 66, 67}, false}, | |
1192 | {"abc", []rune{97, 98, 99}, false}, | |
1193 | {"\u65e5\u672c\u8a9e", []rune{26085, 26412, 35486}, false}, | |
1194 | {"ab\x80c", []rune{97, 98, 0xFFFD, 99}, true}, | |
1195 | {"ab\xc0c", []rune{97, 98, 0xFFFD, 99}, true}, | |
7a938933 ILT |
1196 | } |
1197 | ||
1198 | func TestRunes(t *testing.T) { | |
1199 | for _, tt := range RunesTests { | |
506cf9aa | 1200 | a := []rune(tt.in) |
7a938933 | 1201 | if !runesEqual(a, tt.out) { |
506cf9aa | 1202 | t.Errorf("[]rune(%q) = %v; want %v", tt.in, a, tt.out) |
7a938933 ILT |
1203 | continue |
1204 | } | |
1205 | if !tt.lossy { | |
1206 | // can only test reassembly if we didn't lose information | |
1207 | s := string(a) | |
1208 | if s != tt.in { | |
506cf9aa | 1209 | t.Errorf("string([]rune(%q)) = %x; want %x", tt.in, s, tt.in) |
7a938933 ILT |
1210 | } |
1211 | } | |
1212 | } | |
1213 | } | |
1214 | ||
adb0401d ILT |
1215 | func TestReadByte(t *testing.T) { |
1216 | testStrings := []string{"", abcd, faces, commas} | |
1217 | for _, s := range testStrings { | |
1218 | reader := NewReader(s) | |
1219 | if e := reader.UnreadByte(); e == nil { | |
1220 | t.Errorf("Unreading %q at beginning: expected error", s) | |
1221 | } | |
1222 | var res bytes.Buffer | |
1223 | for { | |
1224 | b, e := reader.ReadByte() | |
2fd401c8 | 1225 | if e == io.EOF { |
adb0401d ILT |
1226 | break |
1227 | } | |
1228 | if e != nil { | |
1229 | t.Errorf("Reading %q: %s", s, e) | |
1230 | break | |
1231 | } | |
1232 | res.WriteByte(b) | |
1233 | // unread and read again | |
1234 | e = reader.UnreadByte() | |
1235 | if e != nil { | |
1236 | t.Errorf("Unreading %q: %s", s, e) | |
1237 | break | |
1238 | } | |
1239 | b1, e := reader.ReadByte() | |
1240 | if e != nil { | |
1241 | t.Errorf("Reading %q after unreading: %s", s, e) | |
1242 | break | |
1243 | } | |
1244 | if b1 != b { | |
1245 | t.Errorf("Reading %q after unreading: want byte %q, got %q", s, b, b1) | |
1246 | break | |
1247 | } | |
1248 | } | |
1249 | if res.String() != s { | |
1250 | t.Errorf("Reader(%q).ReadByte() produced %q", s, res.String()) | |
1251 | } | |
1252 | } | |
1253 | } | |
1254 | ||
7a938933 ILT |
1255 | func TestReadRune(t *testing.T) { |
1256 | testStrings := []string{"", abcd, faces, commas} | |
1257 | for _, s := range testStrings { | |
1258 | reader := NewReader(s) | |
adb0401d ILT |
1259 | if e := reader.UnreadRune(); e == nil { |
1260 | t.Errorf("Unreading %q at beginning: expected error", s) | |
1261 | } | |
7a938933 ILT |
1262 | res := "" |
1263 | for { | |
adb0401d | 1264 | r, z, e := reader.ReadRune() |
2fd401c8 | 1265 | if e == io.EOF { |
7a938933 ILT |
1266 | break |
1267 | } | |
1268 | if e != nil { | |
1269 | t.Errorf("Reading %q: %s", s, e) | |
1270 | break | |
1271 | } | |
1272 | res += string(r) | |
adb0401d ILT |
1273 | // unread and read again |
1274 | e = reader.UnreadRune() | |
1275 | if e != nil { | |
1276 | t.Errorf("Unreading %q: %s", s, e) | |
1277 | break | |
1278 | } | |
1279 | r1, z1, e := reader.ReadRune() | |
1280 | if e != nil { | |
1281 | t.Errorf("Reading %q after unreading: %s", s, e) | |
1282 | break | |
1283 | } | |
1284 | if r1 != r { | |
1285 | t.Errorf("Reading %q after unreading: want rune %q, got %q", s, r, r1) | |
1286 | break | |
1287 | } | |
1288 | if z1 != z { | |
1289 | t.Errorf("Reading %q after unreading: want size %d, got %d", s, z, z1) | |
1290 | break | |
1291 | } | |
7a938933 ILT |
1292 | } |
1293 | if res != s { | |
1294 | t.Errorf("Reader(%q).ReadRune() produced %q", s, res) | |
1295 | } | |
1296 | } | |
1297 | } | |
1298 | ||
00d86ac9 ILT |
1299 | var UnreadRuneErrorTests = []struct { |
1300 | name string | |
1301 | f func(*Reader) | |
1302 | }{ | |
1303 | {"Read", func(r *Reader) { r.Read([]byte{0}) }}, | |
1304 | {"ReadByte", func(r *Reader) { r.ReadByte() }}, | |
1305 | {"UnreadRune", func(r *Reader) { r.UnreadRune() }}, | |
22b955cc | 1306 | {"Seek", func(r *Reader) { r.Seek(0, io.SeekCurrent) }}, |
00d86ac9 ILT |
1307 | {"WriteTo", func(r *Reader) { r.WriteTo(&bytes.Buffer{}) }}, |
1308 | } | |
1309 | ||
1310 | func TestUnreadRuneError(t *testing.T) { | |
1311 | for _, tt := range UnreadRuneErrorTests { | |
1312 | reader := NewReader("0123456789") | |
1313 | if _, _, err := reader.ReadRune(); err != nil { | |
1314 | // should not happen | |
1315 | t.Fatal(err) | |
1316 | } | |
1317 | tt.f(reader) | |
1318 | err := reader.UnreadRune() | |
1319 | if err == nil { | |
1320 | t.Errorf("Unreading after %s: expected error", tt.name) | |
1321 | } | |
1322 | } | |
1323 | } | |
1324 | ||
d8f41257 | 1325 | var ReplaceTests = []struct { |
7a938933 ILT |
1326 | in string |
1327 | old, new string | |
1328 | n int | |
1329 | out string | |
d8f41257 | 1330 | }{ |
7a938933 ILT |
1331 | {"hello", "l", "L", 0, "hello"}, |
1332 | {"hello", "l", "L", -1, "heLLo"}, | |
1333 | {"hello", "x", "X", -1, "hello"}, | |
1334 | {"", "x", "X", -1, ""}, | |
1335 | {"radar", "r", "<r>", -1, "<r>ada<r>"}, | |
1336 | {"", "", "<>", -1, "<>"}, | |
1337 | {"banana", "a", "<>", -1, "b<>n<>n<>"}, | |
1338 | {"banana", "a", "<>", 1, "b<>nana"}, | |
1339 | {"banana", "a", "<>", 1000, "b<>n<>n<>"}, | |
1340 | {"banana", "an", "<>", -1, "b<><>a"}, | |
1341 | {"banana", "ana", "<>", -1, "b<>na"}, | |
1342 | {"banana", "", "<>", -1, "<>b<>a<>n<>a<>n<>a<>"}, | |
1343 | {"banana", "", "<>", 10, "<>b<>a<>n<>a<>n<>a<>"}, | |
1344 | {"banana", "", "<>", 6, "<>b<>a<>n<>a<>n<>a"}, | |
1345 | {"banana", "", "<>", 5, "<>b<>a<>n<>a<>na"}, | |
1346 | {"banana", "", "<>", 1, "<>banana"}, | |
1347 | {"banana", "a", "a", -1, "banana"}, | |
1348 | {"banana", "a", "a", 1, "banana"}, | |
1349 | {"☺☻☹", "", "<>", -1, "<>☺<>☻<>☹<>"}, | |
1350 | } | |
1351 | ||
1352 | func TestReplace(t *testing.T) { | |
1353 | for _, tt := range ReplaceTests { | |
1354 | if s := Replace(tt.in, tt.old, tt.new, tt.n); s != tt.out { | |
1355 | t.Errorf("Replace(%q, %q, %q, %d) = %q, want %q", tt.in, tt.old, tt.new, tt.n, s, tt.out) | |
1356 | } | |
4f4a855d ILT |
1357 | if tt.n == -1 { |
1358 | s := ReplaceAll(tt.in, tt.old, tt.new) | |
1359 | if s != tt.out { | |
1360 | t.Errorf("ReplaceAll(%q, %q, %q) = %q, want %q", tt.in, tt.old, tt.new, s, tt.out) | |
1361 | } | |
1362 | } | |
7a938933 ILT |
1363 | } |
1364 | } | |
1365 | ||
d8f41257 | 1366 | var TitleTests = []struct { |
7a938933 | 1367 | in, out string |
d8f41257 | 1368 | }{ |
7a938933 ILT |
1369 | {"", ""}, |
1370 | {"a", "A"}, | |
1371 | {" aaa aaa aaa ", " Aaa Aaa Aaa "}, | |
1372 | {" Aaa Aaa Aaa ", " Aaa Aaa Aaa "}, | |
1373 | {"123a456", "123a456"}, | |
1374 | {"double-blind", "Double-Blind"}, | |
1375 | {"ÿøû", "Ÿøû"}, | |
bae90c98 ILT |
1376 | {"with_underscore", "With_underscore"}, |
1377 | {"unicode \xe2\x80\xa8 line separator", "Unicode \xe2\x80\xa8 Line Separator"}, | |
7a938933 ILT |
1378 | } |
1379 | ||
1380 | func TestTitle(t *testing.T) { | |
1381 | for _, tt := range TitleTests { | |
1382 | if s := Title(tt.in); s != tt.out { | |
1383 | t.Errorf("Title(%q) = %q, want %q", tt.in, s, tt.out) | |
1384 | } | |
1385 | } | |
1386 | } | |
1387 | ||
d8f41257 | 1388 | var ContainsTests = []struct { |
7a938933 ILT |
1389 | str, substr string |
1390 | expected bool | |
d8f41257 | 1391 | }{ |
7a938933 ILT |
1392 | {"abc", "bc", true}, |
1393 | {"abc", "bcd", false}, | |
1394 | {"abc", "", true}, | |
1395 | {"", "a", false}, | |
22b955cc ILT |
1396 | |
1397 | // cases to cover code in runtime/asm_amd64.s:indexShortStr | |
1398 | // 2-byte needle | |
1399 | {"xxxxxx", "01", false}, | |
1400 | {"01xxxx", "01", true}, | |
1401 | {"xx01xx", "01", true}, | |
1402 | {"xxxx01", "01", true}, | |
1403 | {"01xxxxx"[1:], "01", false}, | |
1404 | {"xxxxx01"[:6], "01", false}, | |
1405 | // 3-byte needle | |
1406 | {"xxxxxxx", "012", false}, | |
1407 | {"012xxxx", "012", true}, | |
1408 | {"xx012xx", "012", true}, | |
1409 | {"xxxx012", "012", true}, | |
1410 | {"012xxxxx"[1:], "012", false}, | |
1411 | {"xxxxx012"[:7], "012", false}, | |
1412 | // 4-byte needle | |
1413 | {"xxxxxxxx", "0123", false}, | |
1414 | {"0123xxxx", "0123", true}, | |
1415 | {"xx0123xx", "0123", true}, | |
1416 | {"xxxx0123", "0123", true}, | |
1417 | {"0123xxxxx"[1:], "0123", false}, | |
1418 | {"xxxxx0123"[:8], "0123", false}, | |
1419 | // 5-7-byte needle | |
1420 | {"xxxxxxxxx", "01234", false}, | |
1421 | {"01234xxxx", "01234", true}, | |
1422 | {"xx01234xx", "01234", true}, | |
1423 | {"xxxx01234", "01234", true}, | |
1424 | {"01234xxxxx"[1:], "01234", false}, | |
1425 | {"xxxxx01234"[:9], "01234", false}, | |
1426 | // 8-byte needle | |
1427 | {"xxxxxxxxxxxx", "01234567", false}, | |
1428 | {"01234567xxxx", "01234567", true}, | |
1429 | {"xx01234567xx", "01234567", true}, | |
1430 | {"xxxx01234567", "01234567", true}, | |
1431 | {"01234567xxxxx"[1:], "01234567", false}, | |
1432 | {"xxxxx01234567"[:12], "01234567", false}, | |
1433 | // 9-15-byte needle | |
1434 | {"xxxxxxxxxxxxx", "012345678", false}, | |
1435 | {"012345678xxxx", "012345678", true}, | |
1436 | {"xx012345678xx", "012345678", true}, | |
1437 | {"xxxx012345678", "012345678", true}, | |
1438 | {"012345678xxxxx"[1:], "012345678", false}, | |
1439 | {"xxxxx012345678"[:13], "012345678", false}, | |
1440 | // 16-byte needle | |
1441 | {"xxxxxxxxxxxxxxxxxxxx", "0123456789ABCDEF", false}, | |
1442 | {"0123456789ABCDEFxxxx", "0123456789ABCDEF", true}, | |
1443 | {"xx0123456789ABCDEFxx", "0123456789ABCDEF", true}, | |
1444 | {"xxxx0123456789ABCDEF", "0123456789ABCDEF", true}, | |
1445 | {"0123456789ABCDEFxxxxx"[1:], "0123456789ABCDEF", false}, | |
1446 | {"xxxxx0123456789ABCDEF"[:20], "0123456789ABCDEF", false}, | |
1447 | // 17-31-byte needle | |
1448 | {"xxxxxxxxxxxxxxxxxxxxx", "0123456789ABCDEFG", false}, | |
1449 | {"0123456789ABCDEFGxxxx", "0123456789ABCDEFG", true}, | |
1450 | {"xx0123456789ABCDEFGxx", "0123456789ABCDEFG", true}, | |
1451 | {"xxxx0123456789ABCDEFG", "0123456789ABCDEFG", true}, | |
1452 | {"0123456789ABCDEFGxxxxx"[1:], "0123456789ABCDEFG", false}, | |
1453 | {"xxxxx0123456789ABCDEFG"[:21], "0123456789ABCDEFG", false}, | |
1454 | ||
1455 | // partial match cases | |
1456 | {"xx01x", "012", false}, // 3 | |
1457 | {"xx0123x", "01234", false}, // 5-7 | |
1458 | {"xx01234567x", "012345678", false}, // 9-15 | |
1459 | {"xx0123456789ABCDEFx", "0123456789ABCDEFG", false}, // 17-31, issue 15679 | |
7a938933 ILT |
1460 | } |
1461 | ||
1462 | func TestContains(t *testing.T) { | |
1463 | for _, ct := range ContainsTests { | |
1464 | if Contains(ct.str, ct.substr) != ct.expected { | |
1465 | t.Errorf("Contains(%s, %s) = %v, want %v", | |
1466 | ct.str, ct.substr, !ct.expected, ct.expected) | |
1467 | } | |
1468 | } | |
1469 | } | |
d8f41257 | 1470 | |
7b1c3dd9 ILT |
1471 | var ContainsAnyTests = []struct { |
1472 | str, substr string | |
1473 | expected bool | |
1474 | }{ | |
1475 | {"", "", false}, | |
1476 | {"", "a", false}, | |
1477 | {"", "abc", false}, | |
1478 | {"a", "", false}, | |
1479 | {"a", "a", true}, | |
1480 | {"aaa", "a", true}, | |
1481 | {"abc", "xyz", false}, | |
1482 | {"abc", "xcz", true}, | |
1483 | {"a☺b☻c☹d", "uvw☻xyz", true}, | |
1484 | {"aRegExp*", ".(|)*+?^$[]", true}, | |
1485 | {dots + dots + dots, " ", false}, | |
1486 | } | |
1487 | ||
1488 | func TestContainsAny(t *testing.T) { | |
1489 | for _, ct := range ContainsAnyTests { | |
1490 | if ContainsAny(ct.str, ct.substr) != ct.expected { | |
1491 | t.Errorf("ContainsAny(%s, %s) = %v, want %v", | |
1492 | ct.str, ct.substr, !ct.expected, ct.expected) | |
1493 | } | |
1494 | } | |
1495 | } | |
1496 | ||
1497 | var ContainsRuneTests = []struct { | |
1498 | str string | |
1499 | r rune | |
1500 | expected bool | |
1501 | }{ | |
1502 | {"", 'a', false}, | |
1503 | {"a", 'a', true}, | |
1504 | {"aaa", 'a', true}, | |
1505 | {"abc", 'y', false}, | |
1506 | {"abc", 'c', true}, | |
1507 | {"a☺b☻c☹d", 'x', false}, | |
1508 | {"a☺b☻c☹d", '☻', true}, | |
1509 | {"aRegExp*", '*', true}, | |
1510 | } | |
1511 | ||
1512 | func TestContainsRune(t *testing.T) { | |
1513 | for _, ct := range ContainsRuneTests { | |
1514 | if ContainsRune(ct.str, ct.r) != ct.expected { | |
be47d6ec | 1515 | t.Errorf("ContainsRune(%q, %q) = %v, want %v", |
7b1c3dd9 ILT |
1516 | ct.str, ct.r, !ct.expected, ct.expected) |
1517 | } | |
1518 | } | |
1519 | } | |
1520 | ||
d8f41257 ILT |
1521 | var EqualFoldTests = []struct { |
1522 | s, t string | |
1523 | out bool | |
1524 | }{ | |
1525 | {"abc", "abc", true}, | |
1526 | {"ABcd", "ABcd", true}, | |
1527 | {"123abc", "123ABC", true}, | |
1528 | {"αβδ", "ΑΒΔ", true}, | |
1529 | {"abc", "xyz", false}, | |
1530 | {"abc", "XYZ", false}, | |
1531 | {"abcdefghijk", "abcdefghijX", false}, | |
1532 | {"abcdefghijk", "abcdefghij\u212A", true}, | |
1533 | {"abcdefghijK", "abcdefghij\u212A", true}, | |
1534 | {"abcdefghijkz", "abcdefghij\u212Ay", false}, | |
1535 | {"abcdefghijKz", "abcdefghij\u212Ay", false}, | |
dd931d9b ILT |
1536 | {"1", "2", false}, |
1537 | {"utf-8", "US-ASCII", false}, | |
d8f41257 ILT |
1538 | } |
1539 | ||
1540 | func TestEqualFold(t *testing.T) { | |
1541 | for _, tt := range EqualFoldTests { | |
1542 | if out := EqualFold(tt.s, tt.t); out != tt.out { | |
1543 | t.Errorf("EqualFold(%#q, %#q) = %v, want %v", tt.s, tt.t, out, tt.out) | |
1544 | } | |
1545 | if out := EqualFold(tt.t, tt.s); out != tt.out { | |
1546 | t.Errorf("EqualFold(%#q, %#q) = %v, want %v", tt.t, tt.s, out, tt.out) | |
1547 | } | |
1548 | } | |
1549 | } | |
4ccad563 | 1550 | |
dd931d9b ILT |
1551 | func BenchmarkEqualFold(b *testing.B) { |
1552 | for i := 0; i < b.N; i++ { | |
1553 | for _, tt := range EqualFoldTests { | |
1554 | if out := EqualFold(tt.s, tt.t); out != tt.out { | |
1555 | b.Fatal("wrong result") | |
1556 | } | |
1557 | } | |
1558 | } | |
1559 | } | |
1560 | ||
f038dae6 ILT |
1561 | var CountTests = []struct { |
1562 | s, sep string | |
1563 | num int | |
1564 | }{ | |
1565 | {"", "", 1}, | |
1566 | {"", "notempty", 0}, | |
1567 | {"notempty", "", 9}, | |
1568 | {"smaller", "not smaller", 0}, | |
1569 | {"12345678987654321", "6", 2}, | |
1570 | {"611161116", "6", 3}, | |
1571 | {"notequal", "NotEqual", 0}, | |
1572 | {"equal", "equal", 1}, | |
1573 | {"abc1231231123q", "123", 3}, | |
1574 | {"11111", "11", 2}, | |
1575 | } | |
1576 | ||
1577 | func TestCount(t *testing.T) { | |
1578 | for _, tt := range CountTests { | |
1579 | if num := Count(tt.s, tt.sep); num != tt.num { | |
1580 | t.Errorf("Count(\"%s\", \"%s\") = %d, want %d", tt.s, tt.sep, num, tt.num) | |
1581 | } | |
1582 | } | |
1583 | } | |
1584 | ||
be47d6ec ILT |
1585 | func makeBenchInputHard() string { |
1586 | tokens := [...]string{ | |
1587 | "<a>", "<p>", "<b>", "<strong>", | |
1588 | "</a>", "</p>", "</b>", "</strong>", | |
1589 | "hello", "world", | |
1590 | } | |
1591 | x := make([]byte, 0, 1<<20) | |
f8d9fa9e | 1592 | for { |
be47d6ec | 1593 | i := rand.Intn(len(tokens)) |
f8d9fa9e ILT |
1594 | if len(x)+len(tokens[i]) >= 1<<20 { |
1595 | break | |
1596 | } | |
be47d6ec ILT |
1597 | x = append(x, tokens[i]...) |
1598 | } | |
1599 | return string(x) | |
1600 | } | |
1601 | ||
1602 | var benchInputHard = makeBenchInputHard() | |
1603 | ||
1604 | func benchmarkIndexHard(b *testing.B, sep string) { | |
1605 | for i := 0; i < b.N; i++ { | |
1606 | Index(benchInputHard, sep) | |
1607 | } | |
1608 | } | |
1609 | ||
f8d9fa9e ILT |
1610 | func benchmarkLastIndexHard(b *testing.B, sep string) { |
1611 | for i := 0; i < b.N; i++ { | |
1612 | LastIndex(benchInputHard, sep) | |
1613 | } | |
1614 | } | |
1615 | ||
be47d6ec ILT |
1616 | func benchmarkCountHard(b *testing.B, sep string) { |
1617 | for i := 0; i < b.N; i++ { | |
1618 | Count(benchInputHard, sep) | |
1619 | } | |
1620 | } | |
1621 | ||
1622 | func BenchmarkIndexHard1(b *testing.B) { benchmarkIndexHard(b, "<>") } | |
1623 | func BenchmarkIndexHard2(b *testing.B) { benchmarkIndexHard(b, "</pre>") } | |
1624 | func BenchmarkIndexHard3(b *testing.B) { benchmarkIndexHard(b, "<b>hello world</b>") } | |
c2047754 ILT |
1625 | func BenchmarkIndexHard4(b *testing.B) { |
1626 | benchmarkIndexHard(b, "<pre><b>hello</b><strong>world</strong></pre>") | |
1627 | } | |
be47d6ec | 1628 | |
f8d9fa9e ILT |
1629 | func BenchmarkLastIndexHard1(b *testing.B) { benchmarkLastIndexHard(b, "<>") } |
1630 | func BenchmarkLastIndexHard2(b *testing.B) { benchmarkLastIndexHard(b, "</pre>") } | |
1631 | func BenchmarkLastIndexHard3(b *testing.B) { benchmarkLastIndexHard(b, "<b>hello world</b>") } | |
1632 | ||
be47d6ec ILT |
1633 | func BenchmarkCountHard1(b *testing.B) { benchmarkCountHard(b, "<>") } |
1634 | func BenchmarkCountHard2(b *testing.B) { benchmarkCountHard(b, "</pre>") } | |
1635 | func BenchmarkCountHard3(b *testing.B) { benchmarkCountHard(b, "<b>hello world</b>") } | |
1636 | ||
1637 | var benchInputTorture = Repeat("ABC", 1<<10) + "123" + Repeat("ABC", 1<<10) | |
1638 | var benchNeedleTorture = Repeat("ABC", 1<<10+1) | |
1639 | ||
1640 | func BenchmarkIndexTorture(b *testing.B) { | |
1641 | for i := 0; i < b.N; i++ { | |
1642 | Index(benchInputTorture, benchNeedleTorture) | |
1643 | } | |
1644 | } | |
1645 | ||
1646 | func BenchmarkCountTorture(b *testing.B) { | |
1647 | for i := 0; i < b.N; i++ { | |
1648 | Count(benchInputTorture, benchNeedleTorture) | |
1649 | } | |
1650 | } | |
1651 | ||
1652 | func BenchmarkCountTortureOverlapping(b *testing.B) { | |
1653 | A := Repeat("ABC", 1<<20) | |
1654 | B := Repeat("ABC", 1<<10) | |
1655 | for i := 0; i < b.N; i++ { | |
1656 | Count(A, B) | |
1657 | } | |
1658 | } | |
1659 | ||
bc998d03 ILT |
1660 | func BenchmarkCountByte(b *testing.B) { |
1661 | indexSizes := []int{10, 32, 4 << 10, 4 << 20, 64 << 20} | |
1662 | benchStr := Repeat(benchmarkString, | |
1663 | (indexSizes[len(indexSizes)-1]+len(benchmarkString)-1)/len(benchmarkString)) | |
1664 | benchFunc := func(b *testing.B, benchStr string) { | |
1665 | b.SetBytes(int64(len(benchStr))) | |
1666 | for i := 0; i < b.N; i++ { | |
1667 | Count(benchStr, "=") | |
1668 | } | |
1669 | } | |
1670 | for _, size := range indexSizes { | |
1671 | b.Run(fmt.Sprintf("%d", size), func(b *testing.B) { | |
1672 | benchFunc(b, benchStr[:size]) | |
1673 | }) | |
1674 | } | |
1675 | ||
1676 | } | |
1677 | ||
4ccad563 ILT |
1678 | var makeFieldsInput = func() string { |
1679 | x := make([]byte, 1<<20) | |
fabcaa8d | 1680 | // Input is ~10% space, ~10% 2-byte UTF-8, rest ASCII non-space. |
4ccad563 ILT |
1681 | for i := range x { |
1682 | switch rand.Intn(10) { | |
1683 | case 0: | |
1684 | x[i] = ' ' | |
1685 | case 1: | |
1686 | if i > 0 && x[i-1] == 'x' { | |
1687 | copy(x[i-1:], "χ") | |
1688 | break | |
1689 | } | |
1690 | fallthrough | |
1691 | default: | |
1692 | x[i] = 'x' | |
1693 | } | |
1694 | } | |
1695 | return string(x) | |
1696 | } | |
1697 | ||
bc998d03 ILT |
1698 | var makeFieldsInputASCII = func() string { |
1699 | x := make([]byte, 1<<20) | |
1700 | // Input is ~10% space, rest ASCII non-space. | |
1701 | for i := range x { | |
1702 | if rand.Intn(10) == 0 { | |
1703 | x[i] = ' ' | |
1704 | } else { | |
1705 | x[i] = 'x' | |
1706 | } | |
1707 | } | |
1708 | return string(x) | |
1709 | } | |
1710 | ||
1711 | var stringdata = []struct{ name, data string }{ | |
1712 | {"ASCII", makeFieldsInputASCII()}, | |
1713 | {"Mixed", makeFieldsInput()}, | |
1714 | } | |
4ccad563 ILT |
1715 | |
1716 | func BenchmarkFields(b *testing.B) { | |
bc998d03 ILT |
1717 | for _, sd := range stringdata { |
1718 | b.Run(sd.name, func(b *testing.B) { | |
1719 | for j := 1 << 4; j <= 1<<20; j <<= 4 { | |
1720 | b.Run(fmt.Sprintf("%d", j), func(b *testing.B) { | |
1721 | b.ReportAllocs() | |
1722 | b.SetBytes(int64(j)) | |
1723 | data := sd.data[:j] | |
1724 | for i := 0; i < b.N; i++ { | |
1725 | Fields(data) | |
1726 | } | |
1727 | }) | |
1728 | } | |
1729 | }) | |
4ccad563 ILT |
1730 | } |
1731 | } | |
1732 | ||
1733 | func BenchmarkFieldsFunc(b *testing.B) { | |
bc998d03 ILT |
1734 | for _, sd := range stringdata { |
1735 | b.Run(sd.name, func(b *testing.B) { | |
1736 | for j := 1 << 4; j <= 1<<20; j <<= 4 { | |
1737 | b.Run(fmt.Sprintf("%d", j), func(b *testing.B) { | |
1738 | b.ReportAllocs() | |
1739 | b.SetBytes(int64(j)) | |
1740 | data := sd.data[:j] | |
1741 | for i := 0; i < b.N; i++ { | |
1742 | FieldsFunc(data, unicode.IsSpace) | |
1743 | } | |
1744 | }) | |
1745 | } | |
1746 | }) | |
4ccad563 ILT |
1747 | } |
1748 | } | |
be47d6ec | 1749 | |
bc998d03 | 1750 | func BenchmarkSplitEmptySeparator(b *testing.B) { |
be47d6ec ILT |
1751 | for i := 0; i < b.N; i++ { |
1752 | Split(benchInputHard, "") | |
1753 | } | |
1754 | } | |
1755 | ||
bc998d03 | 1756 | func BenchmarkSplitSingleByteSeparator(b *testing.B) { |
be47d6ec ILT |
1757 | for i := 0; i < b.N; i++ { |
1758 | Split(benchInputHard, "/") | |
1759 | } | |
1760 | } | |
1761 | ||
bc998d03 | 1762 | func BenchmarkSplitMultiByteSeparator(b *testing.B) { |
be47d6ec ILT |
1763 | for i := 0; i < b.N; i++ { |
1764 | Split(benchInputHard, "hello") | |
1765 | } | |
1766 | } | |
f8d9fa9e | 1767 | |
bc998d03 ILT |
1768 | func BenchmarkSplitNSingleByteSeparator(b *testing.B) { |
1769 | for i := 0; i < b.N; i++ { | |
1770 | SplitN(benchInputHard, "/", 10) | |
1771 | } | |
1772 | } | |
1773 | ||
1774 | func BenchmarkSplitNMultiByteSeparator(b *testing.B) { | |
1775 | for i := 0; i < b.N; i++ { | |
1776 | SplitN(benchInputHard, "hello", 10) | |
1777 | } | |
1778 | } | |
1779 | ||
f8d9fa9e | 1780 | func BenchmarkRepeat(b *testing.B) { |
4f4a855d ILT |
1781 | s := "0123456789" |
1782 | for _, n := range []int{5, 10} { | |
1783 | for _, c := range []int{1, 2, 6} { | |
1784 | b.Run(fmt.Sprintf("%dx%d", n, c), func(b *testing.B) { | |
1785 | for i := 0; i < b.N; i++ { | |
1786 | Repeat(s[:n], c) | |
1787 | } | |
1788 | }) | |
1789 | } | |
f8d9fa9e ILT |
1790 | } |
1791 | } | |
c2047754 ILT |
1792 | |
1793 | func BenchmarkIndexAnyASCII(b *testing.B) { | |
1794 | x := Repeat("#", 4096) // Never matches set | |
1795 | cs := "0123456789abcdef" | |
1796 | for k := 1; k <= 4096; k <<= 4 { | |
1797 | for j := 1; j <= 16; j <<= 1 { | |
1798 | b.Run(fmt.Sprintf("%d:%d", k, j), func(b *testing.B) { | |
1799 | for i := 0; i < b.N; i++ { | |
1800 | IndexAny(x[:k], cs[:j]) | |
1801 | } | |
1802 | }) | |
1803 | } | |
1804 | } | |
1805 | } | |
1806 | ||
1807 | func BenchmarkTrimASCII(b *testing.B) { | |
1808 | cs := "0123456789abcdef" | |
1809 | for k := 1; k <= 4096; k <<= 4 { | |
1810 | for j := 1; j <= 16; j <<= 1 { | |
1811 | b.Run(fmt.Sprintf("%d:%d", k, j), func(b *testing.B) { | |
1812 | x := Repeat(cs[:j], k) // Always matches set | |
1813 | for i := 0; i < b.N; i++ { | |
1814 | Trim(x[:k], cs[:j]) | |
1815 | } | |
1816 | }) | |
1817 | } | |
1818 | } | |
1819 | } | |
1a2f01ef ILT |
1820 | |
1821 | func BenchmarkIndexPeriodic(b *testing.B) { | |
1822 | key := "aa" | |
1823 | for _, skip := range [...]int{2, 4, 8, 16, 32, 64} { | |
1824 | b.Run(fmt.Sprintf("IndexPeriodic%d", skip), func(b *testing.B) { | |
1825 | s := Repeat("a"+Repeat(" ", skip-1), 1<<16/skip) | |
1826 | for i := 0; i < b.N; i++ { | |
1827 | Index(s, key) | |
1828 | } | |
1829 | }) | |
1830 | } | |
1831 | } | |
4f4a855d ILT |
1832 | |
1833 | func BenchmarkJoin(b *testing.B) { | |
1834 | vals := []string{"red", "yellow", "pink", "green", "purple", "orange", "blue"} | |
1835 | for l := 0; l <= len(vals); l++ { | |
1836 | b.Run(strconv.Itoa(l), func(b *testing.B) { | |
1837 | b.ReportAllocs() | |
1838 | vals := vals[:l] | |
1839 | for i := 0; i < b.N; i++ { | |
1840 | Join(vals, " and ") | |
1841 | } | |
1842 | }) | |
1843 | } | |
1844 | } | |
aa8901e9 ILT |
1845 | |
1846 | func BenchmarkTrimSpace(b *testing.B) { | |
1847 | tests := []struct{ name, input string }{ | |
1848 | {"NoTrim", "typical"}, | |
1849 | {"ASCII", " foo bar "}, | |
1850 | {"SomeNonASCII", " \u2000\t\r\n x\t\t\r\r\ny\n \u3000 "}, | |
1851 | {"JustNonASCII", "\u2000\u2000\u2000☺☺☺☺\u3000\u3000\u3000"}, | |
1852 | } | |
1853 | for _, test := range tests { | |
1854 | b.Run(test.name, func(b *testing.B) { | |
1855 | for i := 0; i < b.N; i++ { | |
1856 | TrimSpace(test.input) | |
1857 | } | |
1858 | }) | |
1859 | } | |
1860 | } |