]> git.ipfire.org Git - thirdparty/gcc.git/blame - libgo/go/encoding/json/scanner.go
libgo: update to Go1.14beta1
[thirdparty/gcc.git] / libgo / go / encoding / json / scanner.go
CommitLineData
22b955cc 1// Copyright 2010 The Go Authors. All rights reserved.
7a938933
ILT
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package json
6
7// JSON value parser state machine.
8// Just about at the limit of what is reasonable to write by hand.
9// Some parts are a bit tedious, but overall it nicely factors out the
10// otherwise common code from the multiple scanning functions
dd931d9b 11// in this package (Compact, Indent, checkValid, etc).
7a938933
ILT
12//
13// This file starts with two simple examples using the scanner
14// before diving into the scanner itself.
15
5a8ea165
ILT
16import (
17 "strconv"
18 "sync"
19)
7a938933 20
bc998d03
ILT
21// Valid reports whether data is a valid JSON encoding.
22func Valid(data []byte) bool {
5a8ea165
ILT
23 scan := newScanner()
24 defer freeScanner(scan)
25 return checkValid(data, scan) == nil
bc998d03
ILT
26}
27
7a938933
ILT
28// checkValid verifies that data is valid JSON-encoded data.
29// scan is passed in for use by checkValid to avoid an allocation.
2fd401c8 30func checkValid(data []byte, scan *scanner) error {
7a938933
ILT
31 scan.reset()
32 for _, c := range data {
9ff56c95 33 scan.bytes++
f98dd1a3 34 if scan.step(scan, c) == scanError {
7a938933
ILT
35 return scan.err
36 }
37 }
38 if scan.eof() == scanError {
39 return scan.err
40 }
41 return nil
42}
43
7a938933 44// A SyntaxError is a description of a JSON syntax error.
9ff56c95
ILT
45type SyntaxError struct {
46 msg string // description of error
47 Offset int64 // error occurred after reading Offset bytes
48}
7a938933 49
2fd401c8 50func (e *SyntaxError) Error() string { return e.msg }
7a938933
ILT
51
52// A scanner is a JSON scanning state machine.
5a8ea165 53// Callers call scan.reset and then pass bytes in one at a time
7a938933
ILT
54// by calling scan.step(&scan, c) for each byte.
55// The return value, referred to as an opcode, tells the
56// caller about significant parsing events like beginning
57// and ending literals, objects, and arrays, so that the
58// caller can follow along if it wishes.
59// The return value scanEnd indicates that a single top-level
60// JSON value has been completed, *before* the byte that
61// just got passed in. (The indication must be delayed in order
62// to recognize the end of numbers: is 123 a whole value or
63// the beginning of 12345e+6?).
64type scanner struct {
65 // The step is a func to be called to execute the next transition.
66 // Also tried using an integer constant and a single func
67 // with a switch, but using the func directly was 10% faster
68 // on a 64-bit Mac Mini, and it's nicer to read.
f98dd1a3 69 step func(*scanner, byte) int
7a938933 70
ab61e9c4
ILT
71 // Reached end of top-level value.
72 endTop bool
73
7a938933
ILT
74 // Stack of what we're in the middle of - array values, object keys, object values.
75 parseState []int
76
77 // Error that happened, if any.
2fd401c8 78 err error
7a938933 79
5a8ea165
ILT
80 // total bytes consumed, updated by decoder.Decode (and deliberately
81 // not set to zero by scan.reset)
9ff56c95 82 bytes int64
7a938933
ILT
83}
84
5a8ea165
ILT
85var scannerPool = sync.Pool{
86 New: func() interface{} {
87 return &scanner{}
88 },
89}
90
91func newScanner() *scanner {
92 scan := scannerPool.Get().(*scanner)
93 // scan.reset by design doesn't set bytes to zero
94 scan.bytes = 0
95 scan.reset()
96 return scan
97}
98
99func freeScanner(scan *scanner) {
100 // Avoid hanging on to too much memory in extreme cases.
101 if len(scan.parseState) > 1024 {
102 scan.parseState = nil
103 }
104 scannerPool.Put(scan)
105}
106
7a938933
ILT
107// These values are returned by the state transition functions
108// assigned to scanner.state and the method scanner.eof.
109// They give details about the current state of the scan that
110// callers might be interested to know about.
111// It is okay to ignore the return value of any particular
112// call to scanner.state: if one call returns scanError,
113// every subsequent call will return scanError too.
114const (
115 // Continue.
116 scanContinue = iota // uninteresting byte
117 scanBeginLiteral // end implied by next result != scanContinue
118 scanBeginObject // begin object
119 scanObjectKey // just finished object key (string)
120 scanObjectValue // just finished non-last object value
121 scanEndObject // end object (implies scanObjectValue if possible)
122 scanBeginArray // begin array
123 scanArrayValue // just finished array value
124 scanEndArray // end array (implies scanArrayValue if possible)
125 scanSkipSpace // space byte; can skip; known to be last "continue" result
126
127 // Stop.
128 scanEnd // top-level value ended *before* this byte; known to be first "stop" result
129 scanError // hit an error, scanner.err.
130)
131
132// These values are stored in the parseState stack.
133// They give the current state of a composite value
22b955cc 134// being scanned. If the parser is inside a nested value
7a938933
ILT
135// the parseState describes the nested state, outermost at entry 0.
136const (
137 parseObjectKey = iota // parsing object key (before colon)
138 parseObjectValue // parsing object value (after colon)
139 parseArrayValue // parsing array value
140)
141
142// reset prepares the scanner for use.
143// It must be called before calling s.step.
144func (s *scanner) reset() {
145 s.step = stateBeginValue
146 s.parseState = s.parseState[0:0]
147 s.err = nil
ab61e9c4 148 s.endTop = false
7a938933
ILT
149}
150
151// eof tells the scanner that the end of input has been reached.
152// It returns a scan status just as s.step does.
153func (s *scanner) eof() int {
154 if s.err != nil {
155 return scanError
156 }
ab61e9c4 157 if s.endTop {
7a938933
ILT
158 return scanEnd
159 }
160 s.step(s, ' ')
ab61e9c4 161 if s.endTop {
7a938933
ILT
162 return scanEnd
163 }
164 if s.err == nil {
9ff56c95 165 s.err = &SyntaxError{"unexpected end of JSON input", s.bytes}
7a938933
ILT
166 }
167 return scanError
168}
169
170// pushParseState pushes a new parse state p onto the parse stack.
171func (s *scanner) pushParseState(p int) {
172 s.parseState = append(s.parseState, p)
173}
174
175// popParseState pops a parse state (already obtained) off the stack
176// and updates s.step accordingly.
177func (s *scanner) popParseState() {
178 n := len(s.parseState) - 1
179 s.parseState = s.parseState[0:n]
180 if n == 0 {
181 s.step = stateEndTop
ab61e9c4 182 s.endTop = true
7a938933
ILT
183 } else {
184 s.step = stateEndValue
185 }
186}
187
f98dd1a3 188func isSpace(c byte) bool {
7a938933
ILT
189 return c == ' ' || c == '\t' || c == '\r' || c == '\n'
190}
191
7a938933 192// stateBeginValueOrEmpty is the state after reading `[`.
f98dd1a3
ILT
193func stateBeginValueOrEmpty(s *scanner, c byte) int {
194 if c <= ' ' && isSpace(c) {
7a938933
ILT
195 return scanSkipSpace
196 }
197 if c == ']' {
198 return stateEndValue(s, c)
199 }
200 return stateBeginValue(s, c)
201}
202
203// stateBeginValue is the state at the beginning of the input.
f98dd1a3
ILT
204func stateBeginValue(s *scanner, c byte) int {
205 if c <= ' ' && isSpace(c) {
7a938933
ILT
206 return scanSkipSpace
207 }
208 switch c {
209 case '{':
210 s.step = stateBeginStringOrEmpty
211 s.pushParseState(parseObjectKey)
212 return scanBeginObject
213 case '[':
214 s.step = stateBeginValueOrEmpty
215 s.pushParseState(parseArrayValue)
216 return scanBeginArray
217 case '"':
218 s.step = stateInString
219 return scanBeginLiteral
220 case '-':
221 s.step = stateNeg
222 return scanBeginLiteral
223 case '0': // beginning of 0.123
224 s.step = state0
225 return scanBeginLiteral
226 case 't': // beginning of true
227 s.step = stateT
228 return scanBeginLiteral
229 case 'f': // beginning of false
230 s.step = stateF
231 return scanBeginLiteral
232 case 'n': // beginning of null
233 s.step = stateN
234 return scanBeginLiteral
235 }
236 if '1' <= c && c <= '9' { // beginning of 1234.5
237 s.step = state1
238 return scanBeginLiteral
239 }
240 return s.error(c, "looking for beginning of value")
241}
242
243// stateBeginStringOrEmpty is the state after reading `{`.
f98dd1a3
ILT
244func stateBeginStringOrEmpty(s *scanner, c byte) int {
245 if c <= ' ' && isSpace(c) {
7a938933
ILT
246 return scanSkipSpace
247 }
248 if c == '}' {
249 n := len(s.parseState)
250 s.parseState[n-1] = parseObjectValue
251 return stateEndValue(s, c)
252 }
253 return stateBeginString(s, c)
254}
255
256// stateBeginString is the state after reading `{"key": value,`.
f98dd1a3
ILT
257func stateBeginString(s *scanner, c byte) int {
258 if c <= ' ' && isSpace(c) {
7a938933
ILT
259 return scanSkipSpace
260 }
261 if c == '"' {
262 s.step = stateInString
263 return scanBeginLiteral
264 }
265 return s.error(c, "looking for beginning of object key string")
266}
267
268// stateEndValue is the state after completing a value,
269// such as after reading `{}` or `true` or `["x"`.
f98dd1a3 270func stateEndValue(s *scanner, c byte) int {
7a938933
ILT
271 n := len(s.parseState)
272 if n == 0 {
273 // Completed top-level before the current byte.
274 s.step = stateEndTop
ab61e9c4 275 s.endTop = true
7a938933
ILT
276 return stateEndTop(s, c)
277 }
f98dd1a3 278 if c <= ' ' && isSpace(c) {
7a938933
ILT
279 s.step = stateEndValue
280 return scanSkipSpace
281 }
282 ps := s.parseState[n-1]
283 switch ps {
284 case parseObjectKey:
285 if c == ':' {
286 s.parseState[n-1] = parseObjectValue
287 s.step = stateBeginValue
288 return scanObjectKey
289 }
290 return s.error(c, "after object key")
291 case parseObjectValue:
292 if c == ',' {
293 s.parseState[n-1] = parseObjectKey
294 s.step = stateBeginString
295 return scanObjectValue
296 }
297 if c == '}' {
298 s.popParseState()
299 return scanEndObject
300 }
301 return s.error(c, "after object key:value pair")
302 case parseArrayValue:
303 if c == ',' {
304 s.step = stateBeginValue
305 return scanArrayValue
306 }
307 if c == ']' {
308 s.popParseState()
309 return scanEndArray
310 }
311 return s.error(c, "after array element")
312 }
313 return s.error(c, "")
314}
315
316// stateEndTop is the state after finishing the top-level value,
317// such as after reading `{}` or `[1,2,3]`.
318// Only space characters should be seen now.
f98dd1a3 319func stateEndTop(s *scanner, c byte) int {
4f4a855d 320 if !isSpace(c) {
7a938933
ILT
321 // Complain about non-space byte on next call.
322 s.error(c, "after top-level value")
323 }
324 return scanEnd
325}
326
327// stateInString is the state after reading `"`.
f98dd1a3 328func stateInString(s *scanner, c byte) int {
7a938933
ILT
329 if c == '"' {
330 s.step = stateEndValue
331 return scanContinue
332 }
333 if c == '\\' {
334 s.step = stateInStringEsc
335 return scanContinue
336 }
337 if c < 0x20 {
338 return s.error(c, "in string literal")
339 }
340 return scanContinue
341}
342
343// stateInStringEsc is the state after reading `"\` during a quoted string.
f98dd1a3 344func stateInStringEsc(s *scanner, c byte) int {
7a938933
ILT
345 switch c {
346 case 'b', 'f', 'n', 'r', 't', '\\', '/', '"':
347 s.step = stateInString
348 return scanContinue
f98dd1a3 349 case 'u':
7a938933
ILT
350 s.step = stateInStringEscU
351 return scanContinue
352 }
353 return s.error(c, "in string escape code")
354}
355
356// stateInStringEscU is the state after reading `"\u` during a quoted string.
f98dd1a3 357func stateInStringEscU(s *scanner, c byte) int {
7a938933
ILT
358 if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
359 s.step = stateInStringEscU1
360 return scanContinue
361 }
362 // numbers
363 return s.error(c, "in \\u hexadecimal character escape")
364}
365
366// stateInStringEscU1 is the state after reading `"\u1` during a quoted string.
f98dd1a3 367func stateInStringEscU1(s *scanner, c byte) int {
7a938933
ILT
368 if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
369 s.step = stateInStringEscU12
370 return scanContinue
371 }
372 // numbers
373 return s.error(c, "in \\u hexadecimal character escape")
374}
375
376// stateInStringEscU12 is the state after reading `"\u12` during a quoted string.
f98dd1a3 377func stateInStringEscU12(s *scanner, c byte) int {
7a938933
ILT
378 if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
379 s.step = stateInStringEscU123
380 return scanContinue
381 }
382 // numbers
383 return s.error(c, "in \\u hexadecimal character escape")
384}
385
386// stateInStringEscU123 is the state after reading `"\u123` during a quoted string.
f98dd1a3 387func stateInStringEscU123(s *scanner, c byte) int {
7a938933
ILT
388 if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
389 s.step = stateInString
390 return scanContinue
391 }
392 // numbers
393 return s.error(c, "in \\u hexadecimal character escape")
394}
395
f038dae6 396// stateNeg is the state after reading `-` during a number.
f98dd1a3 397func stateNeg(s *scanner, c byte) int {
7a938933
ILT
398 if c == '0' {
399 s.step = state0
400 return scanContinue
401 }
402 if '1' <= c && c <= '9' {
403 s.step = state1
404 return scanContinue
405 }
406 return s.error(c, "in numeric literal")
407}
408
409// state1 is the state after reading a non-zero integer during a number,
410// such as after reading `1` or `100` but not `0`.
f98dd1a3 411func state1(s *scanner, c byte) int {
7a938933
ILT
412 if '0' <= c && c <= '9' {
413 s.step = state1
414 return scanContinue
415 }
416 return state0(s, c)
417}
418
419// state0 is the state after reading `0` during a number.
f98dd1a3 420func state0(s *scanner, c byte) int {
7a938933
ILT
421 if c == '.' {
422 s.step = stateDot
423 return scanContinue
424 }
5133f00e 425 if c == 'e' || c == 'E' {
7a938933
ILT
426 s.step = stateE
427 return scanContinue
428 }
429 return stateEndValue(s, c)
430}
431
432// stateDot is the state after reading the integer and decimal point in a number,
433// such as after reading `1.`.
f98dd1a3 434func stateDot(s *scanner, c byte) int {
7a938933
ILT
435 if '0' <= c && c <= '9' {
436 s.step = stateDot0
437 return scanContinue
438 }
439 return s.error(c, "after decimal point in numeric literal")
440}
441
442// stateDot0 is the state after reading the integer, decimal point, and subsequent
443// digits of a number, such as after reading `3.14`.
f98dd1a3 444func stateDot0(s *scanner, c byte) int {
7a938933 445 if '0' <= c && c <= '9' {
7a938933
ILT
446 return scanContinue
447 }
5133f00e 448 if c == 'e' || c == 'E' {
7a938933
ILT
449 s.step = stateE
450 return scanContinue
451 }
452 return stateEndValue(s, c)
453}
454
455// stateE is the state after reading the mantissa and e in a number,
456// such as after reading `314e` or `0.314e`.
f98dd1a3
ILT
457func stateE(s *scanner, c byte) int {
458 if c == '+' || c == '-' {
7a938933
ILT
459 s.step = stateESign
460 return scanContinue
461 }
462 return stateESign(s, c)
463}
464
465// stateESign is the state after reading the mantissa, e, and sign in a number,
466// such as after reading `314e-` or `0.314e+`.
f98dd1a3 467func stateESign(s *scanner, c byte) int {
7a938933
ILT
468 if '0' <= c && c <= '9' {
469 s.step = stateE0
470 return scanContinue
471 }
472 return s.error(c, "in exponent of numeric literal")
473}
474
475// stateE0 is the state after reading the mantissa, e, optional sign,
476// and at least one digit of the exponent in a number,
477// such as after reading `314e-2` or `0.314e+1` or `3.14e0`.
f98dd1a3 478func stateE0(s *scanner, c byte) int {
7a938933 479 if '0' <= c && c <= '9' {
7a938933
ILT
480 return scanContinue
481 }
482 return stateEndValue(s, c)
483}
484
485// stateT is the state after reading `t`.
f98dd1a3 486func stateT(s *scanner, c byte) int {
7a938933
ILT
487 if c == 'r' {
488 s.step = stateTr
489 return scanContinue
490 }
491 return s.error(c, "in literal true (expecting 'r')")
492}
493
494// stateTr is the state after reading `tr`.
f98dd1a3 495func stateTr(s *scanner, c byte) int {
7a938933
ILT
496 if c == 'u' {
497 s.step = stateTru
498 return scanContinue
499 }
500 return s.error(c, "in literal true (expecting 'u')")
501}
502
503// stateTru is the state after reading `tru`.
f98dd1a3 504func stateTru(s *scanner, c byte) int {
7a938933
ILT
505 if c == 'e' {
506 s.step = stateEndValue
507 return scanContinue
508 }
509 return s.error(c, "in literal true (expecting 'e')")
510}
511
512// stateF is the state after reading `f`.
f98dd1a3 513func stateF(s *scanner, c byte) int {
7a938933
ILT
514 if c == 'a' {
515 s.step = stateFa
516 return scanContinue
517 }
518 return s.error(c, "in literal false (expecting 'a')")
519}
520
521// stateFa is the state after reading `fa`.
f98dd1a3 522func stateFa(s *scanner, c byte) int {
7a938933
ILT
523 if c == 'l' {
524 s.step = stateFal
525 return scanContinue
526 }
527 return s.error(c, "in literal false (expecting 'l')")
528}
529
530// stateFal is the state after reading `fal`.
f98dd1a3 531func stateFal(s *scanner, c byte) int {
7a938933
ILT
532 if c == 's' {
533 s.step = stateFals
534 return scanContinue
535 }
536 return s.error(c, "in literal false (expecting 's')")
537}
538
539// stateFals is the state after reading `fals`.
f98dd1a3 540func stateFals(s *scanner, c byte) int {
7a938933
ILT
541 if c == 'e' {
542 s.step = stateEndValue
543 return scanContinue
544 }
545 return s.error(c, "in literal false (expecting 'e')")
546}
547
548// stateN is the state after reading `n`.
f98dd1a3 549func stateN(s *scanner, c byte) int {
7a938933
ILT
550 if c == 'u' {
551 s.step = stateNu
552 return scanContinue
553 }
554 return s.error(c, "in literal null (expecting 'u')")
555}
556
557// stateNu is the state after reading `nu`.
f98dd1a3 558func stateNu(s *scanner, c byte) int {
7a938933
ILT
559 if c == 'l' {
560 s.step = stateNul
561 return scanContinue
562 }
563 return s.error(c, "in literal null (expecting 'l')")
564}
565
566// stateNul is the state after reading `nul`.
f98dd1a3 567func stateNul(s *scanner, c byte) int {
7a938933
ILT
568 if c == 'l' {
569 s.step = stateEndValue
570 return scanContinue
571 }
572 return s.error(c, "in literal null (expecting 'l')")
573}
574
575// stateError is the state after reaching a syntax error,
576// such as after reading `[1}` or `5.1.2`.
f98dd1a3 577func stateError(s *scanner, c byte) int {
7a938933
ILT
578 return scanError
579}
580
581// error records an error and switches to the error state.
f98dd1a3 582func (s *scanner) error(c byte, context string) int {
7a938933 583 s.step = stateError
9ff56c95 584 s.err = &SyntaxError{"invalid character " + quoteChar(c) + " " + context, s.bytes}
7a938933
ILT
585 return scanError
586}
587
588// quoteChar formats c as a quoted character literal
f98dd1a3 589func quoteChar(c byte) string {
7a938933
ILT
590 // special cases - different from quoted strings
591 if c == '\'' {
592 return `'\''`
593 }
594 if c == '"' {
595 return `'"'`
596 }
597
598 // use quoted string with different quotation marks
599 s := strconv.Quote(string(c))
600 return "'" + s[1:len(s)-1] + "'"
601}