1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // A library for EBNF grammars. The input is text ([]byte) satisfying
6 // the following grammar (represented itself in EBNF):
8 // Production = name "=" Expression "." .
9 // Expression = Alternative { "|" Alternative } .
10 // Alternative = Term { Term } .
11 // Term = name | token [ "..." token ] | Group | Option | Repetition .
12 // Group = "(" Expression ")" .
13 // Option = "[" Expression "]" .
14 // Repetition = "{" Expression "}" .
16 // A name is a Go identifier, a token is a Go string, and comments
17 // and white space follow the same rules as for the Go language.
18 // Production names starting with an uppercase Unicode letter denote
19 // non-terminal productions (i.e., productions which allow white-space
20 // and comments between tokens); all other production names denote
21 // lexical productions.
34 // ----------------------------------------------------------------------------
35 // Internal representation
38 // An Expression node represents a production expression.
39 Expression interface {
40 // Pos is the position of the first character of the syntactic construct
44 // An Alternative node represents a non-empty list of alternative expressions.
45 Alternative []Expression // x | y | z
47 // A Sequence node represents a non-empty list of sequential expressions.
48 Sequence []Expression // x y z
50 // A Name node represents a production name.
56 // A Token node represents a literal.
62 // A List node represents a range of characters.
64 Begin, End *Token // begin ... end
67 // A Group node represents a grouped expression.
70 Body Expression // (body)
73 // An Option node represents an optional expression.
76 Body Expression // [body]
79 // A Repetition node represents a repeated expression.
82 Body Expression // {body}
85 // A Production node represents an EBNF production.
91 // A Grammar is a set of EBNF productions. The map
92 // is indexed by production name.
94 Grammar map[string]*Production
98 func (x Alternative) Pos() token.Position {
99 return x[0].Pos() // the parser always generates non-empty Alternative
103 func (x Sequence) Pos() token.Position {
104 return x[0].Pos() // the parser always generates non-empty Sequences
108 func (x Range) Pos() token.Position { return x.Begin.Pos() }
111 func (p *Production) Pos() token.Position { return p.Name.Pos() }
114 // ----------------------------------------------------------------------------
115 // Grammar verification
117 func isLexical(name string) bool {
118 ch, _ := utf8.DecodeRuneInString(name)
119 return !unicode.IsUpper(ch)
123 type verifier struct {
125 worklist []*Production
126 reached Grammar // set of productions reached from (and including) the root production
131 func (v *verifier) push(prod *Production) {
132 name := prod.Name.String
133 if _, found := v.reached[name]; !found {
134 v.worklist = append(v.worklist, prod)
135 v.reached[name] = prod
140 func (v *verifier) verifyChar(x *Token) int {
142 if utf8.RuneCountInString(s) != 1 {
143 v.Error(x.Pos(), "single char expected, found "+s)
146 ch, _ := utf8.DecodeRuneInString(s)
151 func (v *verifier) verifyExpr(expr Expression, lexical bool) {
152 switch x := expr.(type) {
156 for _, e := range x {
157 v.verifyExpr(e, lexical)
160 for _, e := range x {
161 v.verifyExpr(e, lexical)
164 // a production with this name must exist;
165 // add it to the worklist if not yet processed
166 if prod, found := v.grammar[x.String]; found {
169 v.Error(x.Pos(), "missing production "+x.String)
171 // within a lexical production references
172 // to non-lexical productions are invalid
173 if lexical && !isLexical(x.String) {
174 v.Error(x.Pos(), "reference to non-lexical production "+x.String)
177 // nothing to do for now
179 i := v.verifyChar(x.Begin)
180 j := v.verifyChar(x.End)
182 v.Error(x.Pos(), "decreasing character range")
185 v.verifyExpr(x.Body, lexical)
187 v.verifyExpr(x.Body, lexical)
189 v.verifyExpr(x.Body, lexical)
196 func (v *verifier) verify(grammar Grammar, start string) {
197 // find root production
198 root, found := grammar[start]
200 var noPos token.Position
201 v.Error(noPos, "no start production "+start)
205 // initialize verifier
206 v.ErrorVector.Reset()
207 v.worklist = v.worklist[0:0]
208 v.reached = make(Grammar)
211 // work through the worklist
214 n := len(v.worklist) - 1
218 prod := v.worklist[n]
219 v.worklist = v.worklist[0:n]
220 v.verifyExpr(prod.Expr, isLexical(prod.Name.String))
223 // check if all productions were reached
224 if len(v.reached) < len(v.grammar) {
225 for name, prod := range v.grammar {
226 if _, found := v.reached[name]; !found {
227 v.Error(prod.Pos(), name+" is unreachable")
234 // Verify checks that:
235 // - all productions used are defined
236 // - all productions defined are used when beginning at start
237 // - lexical productions refer only to other lexical productions
239 func Verify(grammar Grammar, start string) os.Error {
241 v.verify(grammar, start)
242 return v.GetError(scanner.Sorted)