1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
16 // A StringReader delivers its data one string segment at a time via Read.
17 type StringReader struct {
23 func (r *StringReader) Read(p []byte) (n int, err os.Error) {
24 if r.step < len(r.data) {
35 func readRuneSegments(t *testing.T, segments []string) {
37 want := strings.Join(segments, "")
38 s := new(Scanner).Init(&StringReader{data: segments})
47 t.Errorf("segments=%v got=%s want=%s", segments, got, want)
52 var segmentList = [][]string{
56 {"\u65e5", "\u672c", "\u8a9e"},
57 {"\U000065e5", " ", "\U0000672c", "\U00008a9e"},
58 {"\xe6", "\x97\xa5\xe6", "\x9c\xac\xe8\xaa\x9e"},
59 {"Hello", ", ", "World", "!"},
60 {"Hello", ", ", "", "World", "!"},
64 func TestNext(t *testing.T) {
65 for _, s := range segmentList {
66 readRuneSegments(t, s)
76 var f100 = "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
78 var tokenList = []token{
79 {Comment, "// line comments\n"},
82 {Comment, "// comment\n"},
83 {Comment, "// /* comment */\n"},
84 {Comment, "// // comment //\n"},
85 {Comment, "//" + f100 + "\n"},
87 {Comment, "// general comments\n"},
90 {Comment, "/* comment */"},
91 {Comment, "/* // comment */"},
92 {Comment, "/* /* comment */"},
93 {Comment, "/*\n comment\n*/"},
94 {Comment, "/*" + f100 + "*/"},
96 {Comment, "// identifiers\n"},
105 {Ident, "_abc_123_"},
108 // TODO for unknown reasons these fail when checking the literals
118 {Comment, "// decimal ints\n"},
125 {Comment, "// octal ints\n"},
132 {Comment, "// hexadecimal ints\n"},
137 {Int, "0x123456789abcDEF"},
143 {Int, "0X123456789abcDEF"},
146 {Comment, "// floats\n"},
150 {Float, "01234567890."},
154 {Float, ".0123456789"},
158 {Float, "01234567890.0"},
162 {Float, "01234567890e0"},
166 {Float, "01234567890E0"},
170 {Float, "01234567890e-10"},
174 {Float, "01234567890E-10"},
176 {Comment, "// chars\n"},
194 {Char, `'\U00000000'`},
195 {Char, `'\U0000ffAB'`},
197 {Comment, "// strings\n"},
213 {String, `"\u0000"`},
214 {String, `"\ufA16"`},
215 {String, `"\U00000000"`},
216 {String, `"\U0000ffAB"`},
217 {String, `"` + f100 + `"`},
219 {Comment, "// raw strings\n"},
222 {String, "`" + "\n\n/* foobar */\n\n" + "`"},
223 {String, "`" + f100 + "`"},
225 {Comment, "// individual characters\n"},
226 // NUL character is not allowed
228 {' ' - 1, string(' ' - 1)},
237 func makeSource(pattern string) *bytes.Buffer {
239 for _, k := range tokenList {
240 fmt.Fprintf(&buf, pattern, k.text)
246 func checkTok(t *testing.T, s *Scanner, line, got, want int, text string) {
248 t.Fatalf("tok = %s, want %s for %q", TokenString(got), TokenString(want), text)
251 t.Errorf("line = %d, want %d for %q", s.Line, line, text)
253 stext := s.TokenText()
255 t.Errorf("text = %q, want %q", stext, text)
257 // check idempotency of TokenText() call
258 stext = s.TokenText()
260 t.Errorf("text = %q, want %q (idempotency check)", stext, text)
266 func countNewlines(s string) int {
268 for _, ch := range s {
277 func testScan(t *testing.T, mode uint) {
278 s := new(Scanner).Init(makeSource(" \t%s\t\n\r"))
282 for _, k := range tokenList {
283 if mode&SkipComments == 0 || k.tok != Comment {
284 checkTok(t, s, line, tok, k.tok, k.text)
287 line += countNewlines(k.text) + 1 // each token is on a new line
289 checkTok(t, s, line, tok, -1, "")
293 func TestScan(t *testing.T) {
294 testScan(t, GoTokens)
295 testScan(t, GoTokens&^SkipComments)
299 func TestPosition(t *testing.T) {
300 src := makeSource("\t\t\t\t%s\n")
301 s := new(Scanner).Init(src)
302 s.Mode = GoTokens &^ SkipComments
304 pos := Position{"", 4, 1, 5}
305 for _, k := range tokenList {
306 if s.Offset != pos.Offset {
307 t.Errorf("offset = %d, want %d for %q", s.Offset, pos.Offset, k.text)
309 if s.Line != pos.Line {
310 t.Errorf("line = %d, want %d for %q", s.Line, pos.Line, k.text)
312 if s.Column != pos.Column {
313 t.Errorf("column = %d, want %d for %q", s.Column, pos.Column, k.text)
315 pos.Offset += 4 + len(k.text) + 1 // 4 tabs + token bytes + newline
316 pos.Line += countNewlines(k.text) + 1 // each token is on a new line
322 func TestScanZeroMode(t *testing.T) {
323 src := makeSource("%s\n")
325 s := new(Scanner).Init(src)
326 s.Mode = 0 // don't recognize any token classes
327 s.Whitespace = 0 // don't skip any whitespace
329 for i, ch := range str {
331 t.Fatalf("%d. tok = %s, want %s", i, TokenString(tok), TokenString(ch))
336 t.Fatalf("tok = %s, want EOF", TokenString(tok))
341 func testScanSelectedMode(t *testing.T, mode uint, class int) {
342 src := makeSource("%s\n")
343 s := new(Scanner).Init(src)
347 if tok < 0 && tok != class {
348 t.Fatalf("tok = %s, want %s", TokenString(tok), TokenString(class))
355 func TestScanSelectedMask(t *testing.T) {
356 testScanSelectedMode(t, 0, 0)
357 testScanSelectedMode(t, ScanIdents, Ident)
358 // Don't test ScanInts and ScanNumbers since some parts of
359 // the floats in the source look like (illegal) octal ints
360 // and ScanNumbers may return either Int or Float.
361 testScanSelectedMode(t, ScanChars, Char)
362 testScanSelectedMode(t, ScanStrings, String)
363 testScanSelectedMode(t, SkipComments, 0)
364 testScanSelectedMode(t, ScanComments, Comment)
368 func TestScanNext(t *testing.T) {
369 s := new(Scanner).Init(bytes.NewBufferString("if a == bcd /* comment */ {\n\ta += c\n}"))
370 checkTok(t, s, 1, s.Scan(), Ident, "if")
371 checkTok(t, s, 1, s.Scan(), Ident, "a")
372 checkTok(t, s, 1, s.Scan(), '=', "=")
373 checkTok(t, s, 1, s.Next(), '=', "")
374 checkTok(t, s, 1, s.Next(), ' ', "")
375 checkTok(t, s, 1, s.Next(), 'b', "")
376 checkTok(t, s, 1, s.Scan(), Ident, "cd")
377 checkTok(t, s, 1, s.Scan(), '{', "{")
378 checkTok(t, s, 2, s.Scan(), Ident, "a")
379 checkTok(t, s, 2, s.Scan(), '+', "+")
380 checkTok(t, s, 2, s.Next(), '=', "")
381 checkTok(t, s, 2, s.Scan(), Ident, "c")
382 checkTok(t, s, 3, s.Scan(), '}', "}")
383 checkTok(t, s, 3, s.Scan(), -1, "")
387 func TestScanWhitespace(t *testing.T) {
390 // start at 1, NUL character is not allowed
391 for ch := byte(1); ch < ' '; ch++ {
398 s := new(Scanner).Init(&buf)
403 t.Errorf("tok = %s, want %s", TokenString(tok), TokenString(orig))
408 func testError(t *testing.T, src, msg string, tok int) {
409 s := new(Scanner).Init(bytes.NewBufferString(src))
411 s.Error = func(s *Scanner, m string) {
413 // only look at first error
415 t.Errorf("msg = %q, want %q for %q", m, msg, src)
422 t.Errorf("tok = %s, want %s for %q", TokenString(tk), TokenString(tok), src)
425 t.Errorf("error handler not called for %q", src)
427 if s.ErrorCount == 0 {
428 t.Errorf("count = %d, want > 0 for %q", s.ErrorCount, src)
433 func TestError(t *testing.T) {
434 testError(t, `01238`, "illegal octal number", Int)
435 testError(t, `'\"'`, "illegal char escape", Char)
436 testError(t, `'aa'`, "illegal char literal", Char)
437 testError(t, `'`, "literal not terminated", Char)
438 testError(t, `"\'"`, "illegal char escape", String)
439 testError(t, `"abc`, "literal not terminated", String)
440 testError(t, "`abc", "literal not terminated", String)
441 testError(t, `//`, "comment not terminated", EOF)
442 testError(t, `/*/`, "comment not terminated", EOF)
443 testError(t, `"abc`+"\x00"+`def"`, "illegal character NUL", String)
444 testError(t, `"abc`+"\xff"+`def"`, "illegal UTF-8 encoding", String)
448 func checkPos(t *testing.T, s *Scanner, offset, line, column, char int) {
450 if pos.Offset != offset {
451 t.Errorf("offset = %d, want %d", pos.Offset, offset)
453 if pos.Line != line {
454 t.Errorf("line = %d, want %d", pos.Line, line)
456 if pos.Column != column {
457 t.Errorf("column = %d, want %d", pos.Column, column)
461 t.Errorf("ch = %s, want %s", TokenString(ch), TokenString(char))
466 func TestPos(t *testing.T) {
467 s := new(Scanner).Init(bytes.NewBufferString("abc\n012\n\nx"))
470 checkPos(t, s, 0, 1, 1, 'a')
471 checkPos(t, s, 1, 1, 2, 'b')
472 checkPos(t, s, 2, 1, 3, 'c')
473 checkPos(t, s, 3, 2, 0, '\n')
474 checkPos(t, s, 4, 2, 1, '0')
475 checkPos(t, s, 5, 2, 2, '1')
476 checkPos(t, s, 6, 2, 3, '2')
477 checkPos(t, s, 7, 3, 0, '\n')
478 checkPos(t, s, 8, 4, 0, '\n')
479 checkPos(t, s, 9, 4, 1, 'x')
480 checkPos(t, s, 9, 4, 1, EOF)
481 checkPos(t, s, 9, 4, 1, EOF) // after EOF, position doesn't change