]> git.ipfire.org Git - thirdparty/gcc.git/blob - libgo/go/scanner/scanner_test.go
Add Go frontend, libgo library, and Go testsuite.
[thirdparty/gcc.git] / libgo / go / scanner / scanner_test.go
1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 package scanner
6
7 import (
8 "bytes"
9 "fmt"
10 "os"
11 "strings"
12 "testing"
13 )
14
15
16 // A StringReader delivers its data one string segment at a time via Read.
17 type StringReader struct {
18 data []string
19 step int
20 }
21
22
23 func (r *StringReader) Read(p []byte) (n int, err os.Error) {
24 if r.step < len(r.data) {
25 s := r.data[r.step]
26 n = copy(p, s)
27 r.step++
28 } else {
29 err = os.EOF
30 }
31 return
32 }
33
34
35 func readRuneSegments(t *testing.T, segments []string) {
36 got := ""
37 want := strings.Join(segments, "")
38 s := new(Scanner).Init(&StringReader{data: segments})
39 for {
40 ch := s.Next()
41 if ch == EOF {
42 break
43 }
44 got += string(ch)
45 }
46 if got != want {
47 t.Errorf("segments=%v got=%s want=%s", segments, got, want)
48 }
49 }
50
51
52 var segmentList = [][]string{
53 {},
54 {""},
55 {"日", "本語"},
56 {"\u65e5", "\u672c", "\u8a9e"},
57 {"\U000065e5", " ", "\U0000672c", "\U00008a9e"},
58 {"\xe6", "\x97\xa5\xe6", "\x9c\xac\xe8\xaa\x9e"},
59 {"Hello", ", ", "World", "!"},
60 {"Hello", ", ", "", "World", "!"},
61 }
62
63
64 func TestNext(t *testing.T) {
65 for _, s := range segmentList {
66 readRuneSegments(t, s)
67 }
68 }
69
70
71 type token struct {
72 tok int
73 text string
74 }
75
76 var f100 = "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
77
78 var tokenList = []token{
79 {Comment, "// line comments\n"},
80 {Comment, "//\n"},
81 {Comment, "////\n"},
82 {Comment, "// comment\n"},
83 {Comment, "// /* comment */\n"},
84 {Comment, "// // comment //\n"},
85 {Comment, "//" + f100 + "\n"},
86
87 {Comment, "// general comments\n"},
88 {Comment, "/**/"},
89 {Comment, "/***/"},
90 {Comment, "/* comment */"},
91 {Comment, "/* // comment */"},
92 {Comment, "/* /* comment */"},
93 {Comment, "/*\n comment\n*/"},
94 {Comment, "/*" + f100 + "*/"},
95
96 {Comment, "// identifiers\n"},
97 {Ident, "a"},
98 {Ident, "a0"},
99 {Ident, "foobar"},
100 {Ident, "abc123"},
101 {Ident, "LGTM"},
102 {Ident, "_"},
103 {Ident, "_abc123"},
104 {Ident, "abc123_"},
105 {Ident, "_abc_123_"},
106 {Ident, "_äöü"},
107 {Ident, "_本"},
108 // TODO for unknown reasons these fail when checking the literals
109 /*
110 token{Ident, "äöü"},
111 token{Ident, "本"},
112 */
113 {Ident, "a۰۱۸"},
114 {Ident, "foo६४"},
115 {Ident, "bar9876"},
116 {Ident, f100},
117
118 {Comment, "// decimal ints\n"},
119 {Int, "0"},
120 {Int, "1"},
121 {Int, "9"},
122 {Int, "42"},
123 {Int, "1234567890"},
124
125 {Comment, "// octal ints\n"},
126 {Int, "00"},
127 {Int, "01"},
128 {Int, "07"},
129 {Int, "042"},
130 {Int, "01234567"},
131
132 {Comment, "// hexadecimal ints\n"},
133 {Int, "0x0"},
134 {Int, "0x1"},
135 {Int, "0xf"},
136 {Int, "0x42"},
137 {Int, "0x123456789abcDEF"},
138 {Int, "0x" + f100},
139 {Int, "0X0"},
140 {Int, "0X1"},
141 {Int, "0XF"},
142 {Int, "0X42"},
143 {Int, "0X123456789abcDEF"},
144 {Int, "0X" + f100},
145
146 {Comment, "// floats\n"},
147 {Float, "0."},
148 {Float, "1."},
149 {Float, "42."},
150 {Float, "01234567890."},
151 {Float, ".0"},
152 {Float, ".1"},
153 {Float, ".42"},
154 {Float, ".0123456789"},
155 {Float, "0.0"},
156 {Float, "1.0"},
157 {Float, "42.0"},
158 {Float, "01234567890.0"},
159 {Float, "0e0"},
160 {Float, "1e0"},
161 {Float, "42e0"},
162 {Float, "01234567890e0"},
163 {Float, "0E0"},
164 {Float, "1E0"},
165 {Float, "42E0"},
166 {Float, "01234567890E0"},
167 {Float, "0e+10"},
168 {Float, "1e-10"},
169 {Float, "42e+10"},
170 {Float, "01234567890e-10"},
171 {Float, "0E+10"},
172 {Float, "1E-10"},
173 {Float, "42E+10"},
174 {Float, "01234567890E-10"},
175
176 {Comment, "// chars\n"},
177 {Char, `' '`},
178 {Char, `'a'`},
179 {Char, `'本'`},
180 {Char, `'\a'`},
181 {Char, `'\b'`},
182 {Char, `'\f'`},
183 {Char, `'\n'`},
184 {Char, `'\r'`},
185 {Char, `'\t'`},
186 {Char, `'\v'`},
187 {Char, `'\''`},
188 {Char, `'\000'`},
189 {Char, `'\777'`},
190 {Char, `'\x00'`},
191 {Char, `'\xff'`},
192 {Char, `'\u0000'`},
193 {Char, `'\ufA16'`},
194 {Char, `'\U00000000'`},
195 {Char, `'\U0000ffAB'`},
196
197 {Comment, "// strings\n"},
198 {String, `" "`},
199 {String, `"a"`},
200 {String, `"本"`},
201 {String, `"\a"`},
202 {String, `"\b"`},
203 {String, `"\f"`},
204 {String, `"\n"`},
205 {String, `"\r"`},
206 {String, `"\t"`},
207 {String, `"\v"`},
208 {String, `"\""`},
209 {String, `"\000"`},
210 {String, `"\777"`},
211 {String, `"\x00"`},
212 {String, `"\xff"`},
213 {String, `"\u0000"`},
214 {String, `"\ufA16"`},
215 {String, `"\U00000000"`},
216 {String, `"\U0000ffAB"`},
217 {String, `"` + f100 + `"`},
218
219 {Comment, "// raw strings\n"},
220 {String, "``"},
221 {String, "`\\`"},
222 {String, "`" + "\n\n/* foobar */\n\n" + "`"},
223 {String, "`" + f100 + "`"},
224
225 {Comment, "// individual characters\n"},
226 // NUL character is not allowed
227 {'\x01', "\x01"},
228 {' ' - 1, string(' ' - 1)},
229 {'+', "+"},
230 {'/', "/"},
231 {'.', "."},
232 {'~', "~"},
233 {'(', "("},
234 }
235
236
237 func makeSource(pattern string) *bytes.Buffer {
238 var buf bytes.Buffer
239 for _, k := range tokenList {
240 fmt.Fprintf(&buf, pattern, k.text)
241 }
242 return &buf
243 }
244
245
246 func checkTok(t *testing.T, s *Scanner, line, got, want int, text string) {
247 if got != want {
248 t.Fatalf("tok = %s, want %s for %q", TokenString(got), TokenString(want), text)
249 }
250 if s.Line != line {
251 t.Errorf("line = %d, want %d for %q", s.Line, line, text)
252 }
253 stext := s.TokenText()
254 if stext != text {
255 t.Errorf("text = %q, want %q", stext, text)
256 } else {
257 // check idempotency of TokenText() call
258 stext = s.TokenText()
259 if stext != text {
260 t.Errorf("text = %q, want %q (idempotency check)", stext, text)
261 }
262 }
263 }
264
265
266 func countNewlines(s string) int {
267 n := 0
268 for _, ch := range s {
269 if ch == '\n' {
270 n++
271 }
272 }
273 return n
274 }
275
276
277 func testScan(t *testing.T, mode uint) {
278 s := new(Scanner).Init(makeSource(" \t%s\t\n\r"))
279 s.Mode = mode
280 tok := s.Scan()
281 line := 1
282 for _, k := range tokenList {
283 if mode&SkipComments == 0 || k.tok != Comment {
284 checkTok(t, s, line, tok, k.tok, k.text)
285 tok = s.Scan()
286 }
287 line += countNewlines(k.text) + 1 // each token is on a new line
288 }
289 checkTok(t, s, line, tok, -1, "")
290 }
291
292
293 func TestScan(t *testing.T) {
294 testScan(t, GoTokens)
295 testScan(t, GoTokens&^SkipComments)
296 }
297
298
299 func TestPosition(t *testing.T) {
300 src := makeSource("\t\t\t\t%s\n")
301 s := new(Scanner).Init(src)
302 s.Mode = GoTokens &^ SkipComments
303 s.Scan()
304 pos := Position{"", 4, 1, 5}
305 for _, k := range tokenList {
306 if s.Offset != pos.Offset {
307 t.Errorf("offset = %d, want %d for %q", s.Offset, pos.Offset, k.text)
308 }
309 if s.Line != pos.Line {
310 t.Errorf("line = %d, want %d for %q", s.Line, pos.Line, k.text)
311 }
312 if s.Column != pos.Column {
313 t.Errorf("column = %d, want %d for %q", s.Column, pos.Column, k.text)
314 }
315 pos.Offset += 4 + len(k.text) + 1 // 4 tabs + token bytes + newline
316 pos.Line += countNewlines(k.text) + 1 // each token is on a new line
317 s.Scan()
318 }
319 }
320
321
322 func TestScanZeroMode(t *testing.T) {
323 src := makeSource("%s\n")
324 str := src.String()
325 s := new(Scanner).Init(src)
326 s.Mode = 0 // don't recognize any token classes
327 s.Whitespace = 0 // don't skip any whitespace
328 tok := s.Scan()
329 for i, ch := range str {
330 if tok != ch {
331 t.Fatalf("%d. tok = %s, want %s", i, TokenString(tok), TokenString(ch))
332 }
333 tok = s.Scan()
334 }
335 if tok != EOF {
336 t.Fatalf("tok = %s, want EOF", TokenString(tok))
337 }
338 }
339
340
341 func testScanSelectedMode(t *testing.T, mode uint, class int) {
342 src := makeSource("%s\n")
343 s := new(Scanner).Init(src)
344 s.Mode = mode
345 tok := s.Scan()
346 for tok != EOF {
347 if tok < 0 && tok != class {
348 t.Fatalf("tok = %s, want %s", TokenString(tok), TokenString(class))
349 }
350 tok = s.Scan()
351 }
352 }
353
354
355 func TestScanSelectedMask(t *testing.T) {
356 testScanSelectedMode(t, 0, 0)
357 testScanSelectedMode(t, ScanIdents, Ident)
358 // Don't test ScanInts and ScanNumbers since some parts of
359 // the floats in the source look like (illegal) octal ints
360 // and ScanNumbers may return either Int or Float.
361 testScanSelectedMode(t, ScanChars, Char)
362 testScanSelectedMode(t, ScanStrings, String)
363 testScanSelectedMode(t, SkipComments, 0)
364 testScanSelectedMode(t, ScanComments, Comment)
365 }
366
367
368 func TestScanNext(t *testing.T) {
369 s := new(Scanner).Init(bytes.NewBufferString("if a == bcd /* comment */ {\n\ta += c\n}"))
370 checkTok(t, s, 1, s.Scan(), Ident, "if")
371 checkTok(t, s, 1, s.Scan(), Ident, "a")
372 checkTok(t, s, 1, s.Scan(), '=', "=")
373 checkTok(t, s, 1, s.Next(), '=', "")
374 checkTok(t, s, 1, s.Next(), ' ', "")
375 checkTok(t, s, 1, s.Next(), 'b', "")
376 checkTok(t, s, 1, s.Scan(), Ident, "cd")
377 checkTok(t, s, 1, s.Scan(), '{', "{")
378 checkTok(t, s, 2, s.Scan(), Ident, "a")
379 checkTok(t, s, 2, s.Scan(), '+', "+")
380 checkTok(t, s, 2, s.Next(), '=', "")
381 checkTok(t, s, 2, s.Scan(), Ident, "c")
382 checkTok(t, s, 3, s.Scan(), '}', "}")
383 checkTok(t, s, 3, s.Scan(), -1, "")
384 }
385
386
387 func TestScanWhitespace(t *testing.T) {
388 var buf bytes.Buffer
389 var ws uint64
390 // start at 1, NUL character is not allowed
391 for ch := byte(1); ch < ' '; ch++ {
392 buf.WriteByte(ch)
393 ws |= 1 << ch
394 }
395 const orig = 'x'
396 buf.WriteByte(orig)
397
398 s := new(Scanner).Init(&buf)
399 s.Mode = 0
400 s.Whitespace = ws
401 tok := s.Scan()
402 if tok != orig {
403 t.Errorf("tok = %s, want %s", TokenString(tok), TokenString(orig))
404 }
405 }
406
407
408 func testError(t *testing.T, src, msg string, tok int) {
409 s := new(Scanner).Init(bytes.NewBufferString(src))
410 errorCalled := false
411 s.Error = func(s *Scanner, m string) {
412 if !errorCalled {
413 // only look at first error
414 if m != msg {
415 t.Errorf("msg = %q, want %q for %q", m, msg, src)
416 }
417 errorCalled = true
418 }
419 }
420 tk := s.Scan()
421 if tk != tok {
422 t.Errorf("tok = %s, want %s for %q", TokenString(tk), TokenString(tok), src)
423 }
424 if !errorCalled {
425 t.Errorf("error handler not called for %q", src)
426 }
427 if s.ErrorCount == 0 {
428 t.Errorf("count = %d, want > 0 for %q", s.ErrorCount, src)
429 }
430 }
431
432
433 func TestError(t *testing.T) {
434 testError(t, `01238`, "illegal octal number", Int)
435 testError(t, `'\"'`, "illegal char escape", Char)
436 testError(t, `'aa'`, "illegal char literal", Char)
437 testError(t, `'`, "literal not terminated", Char)
438 testError(t, `"\'"`, "illegal char escape", String)
439 testError(t, `"abc`, "literal not terminated", String)
440 testError(t, "`abc", "literal not terminated", String)
441 testError(t, `//`, "comment not terminated", EOF)
442 testError(t, `/*/`, "comment not terminated", EOF)
443 testError(t, `"abc`+"\x00"+`def"`, "illegal character NUL", String)
444 testError(t, `"abc`+"\xff"+`def"`, "illegal UTF-8 encoding", String)
445 }
446
447
448 func checkPos(t *testing.T, s *Scanner, offset, line, column, char int) {
449 pos := s.Pos()
450 if pos.Offset != offset {
451 t.Errorf("offset = %d, want %d", pos.Offset, offset)
452 }
453 if pos.Line != line {
454 t.Errorf("line = %d, want %d", pos.Line, line)
455 }
456 if pos.Column != column {
457 t.Errorf("column = %d, want %d", pos.Column, column)
458 }
459 ch := s.Scan()
460 if ch != char {
461 t.Errorf("ch = %s, want %s", TokenString(ch), TokenString(char))
462 }
463 }
464
465
466 func TestPos(t *testing.T) {
467 s := new(Scanner).Init(bytes.NewBufferString("abc\n012\n\nx"))
468 s.Mode = 0
469 s.Whitespace = 0
470 checkPos(t, s, 0, 1, 1, 'a')
471 checkPos(t, s, 1, 1, 2, 'b')
472 checkPos(t, s, 2, 1, 3, 'c')
473 checkPos(t, s, 3, 2, 0, '\n')
474 checkPos(t, s, 4, 2, 1, '0')
475 checkPos(t, s, 5, 2, 2, '1')
476 checkPos(t, s, 6, 2, 3, '2')
477 checkPos(t, s, 7, 3, 0, '\n')
478 checkPos(t, s, 8, 4, 0, '\n')
479 checkPos(t, s, 9, 4, 1, 'x')
480 checkPos(t, s, 9, 4, 1, EOF)
481 checkPos(t, s, 9, 4, 1, EOF) // after EOF, position doesn't change
482 }