]> git.ipfire.org Git - thirdparty/gcc.git/blob - libgo/go/ebnf/ebnf.go
Add Go frontend, libgo library, and Go testsuite.
[thirdparty/gcc.git] / libgo / go / ebnf / ebnf.go
1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 // A library for EBNF grammars. The input is text ([]byte) satisfying
6 // the following grammar (represented itself in EBNF):
7 //
8 // Production = name "=" Expression "." .
9 // Expression = Alternative { "|" Alternative } .
10 // Alternative = Term { Term } .
11 // Term = name | token [ "..." token ] | Group | Option | Repetition .
12 // Group = "(" Expression ")" .
13 // Option = "[" Expression "]" .
14 // Repetition = "{" Expression "}" .
15 //
16 // A name is a Go identifier, a token is a Go string, and comments
17 // and white space follow the same rules as for the Go language.
18 // Production names starting with an uppercase Unicode letter denote
19 // non-terminal productions (i.e., productions which allow white-space
20 // and comments between tokens); all other production names denote
21 // lexical productions.
22 //
23 package ebnf
24
25 import (
26 "go/scanner"
27 "go/token"
28 "os"
29 "unicode"
30 "utf8"
31 )
32
33
34 // ----------------------------------------------------------------------------
35 // Internal representation
36
37 type (
38 // An Expression node represents a production expression.
39 Expression interface {
40 // Pos is the position of the first character of the syntactic construct
41 Pos() token.Position
42 }
43
44 // An Alternative node represents a non-empty list of alternative expressions.
45 Alternative []Expression // x | y | z
46
47 // A Sequence node represents a non-empty list of sequential expressions.
48 Sequence []Expression // x y z
49
50 // A Name node represents a production name.
51 Name struct {
52 token.Position
53 String string
54 }
55
56 // A Token node represents a literal.
57 Token struct {
58 token.Position
59 String string
60 }
61
62 // A List node represents a range of characters.
63 Range struct {
64 Begin, End *Token // begin ... end
65 }
66
67 // A Group node represents a grouped expression.
68 Group struct {
69 token.Position
70 Body Expression // (body)
71 }
72
73 // An Option node represents an optional expression.
74 Option struct {
75 token.Position
76 Body Expression // [body]
77 }
78
79 // A Repetition node represents a repeated expression.
80 Repetition struct {
81 token.Position
82 Body Expression // {body}
83 }
84
85 // A Production node represents an EBNF production.
86 Production struct {
87 Name *Name
88 Expr Expression
89 }
90
91 // A Grammar is a set of EBNF productions. The map
92 // is indexed by production name.
93 //
94 Grammar map[string]*Production
95 )
96
97
98 func (x Alternative) Pos() token.Position {
99 return x[0].Pos() // the parser always generates non-empty Alternative
100 }
101
102
103 func (x Sequence) Pos() token.Position {
104 return x[0].Pos() // the parser always generates non-empty Sequences
105 }
106
107
108 func (x Range) Pos() token.Position { return x.Begin.Pos() }
109
110
111 func (p *Production) Pos() token.Position { return p.Name.Pos() }
112
113
114 // ----------------------------------------------------------------------------
115 // Grammar verification
116
117 func isLexical(name string) bool {
118 ch, _ := utf8.DecodeRuneInString(name)
119 return !unicode.IsUpper(ch)
120 }
121
122
123 type verifier struct {
124 scanner.ErrorVector
125 worklist []*Production
126 reached Grammar // set of productions reached from (and including) the root production
127 grammar Grammar
128 }
129
130
131 func (v *verifier) push(prod *Production) {
132 name := prod.Name.String
133 if _, found := v.reached[name]; !found {
134 v.worklist = append(v.worklist, prod)
135 v.reached[name] = prod
136 }
137 }
138
139
140 func (v *verifier) verifyChar(x *Token) int {
141 s := x.String
142 if utf8.RuneCountInString(s) != 1 {
143 v.Error(x.Pos(), "single char expected, found "+s)
144 return 0
145 }
146 ch, _ := utf8.DecodeRuneInString(s)
147 return ch
148 }
149
150
151 func (v *verifier) verifyExpr(expr Expression, lexical bool) {
152 switch x := expr.(type) {
153 case nil:
154 // empty expression
155 case Alternative:
156 for _, e := range x {
157 v.verifyExpr(e, lexical)
158 }
159 case Sequence:
160 for _, e := range x {
161 v.verifyExpr(e, lexical)
162 }
163 case *Name:
164 // a production with this name must exist;
165 // add it to the worklist if not yet processed
166 if prod, found := v.grammar[x.String]; found {
167 v.push(prod)
168 } else {
169 v.Error(x.Pos(), "missing production "+x.String)
170 }
171 // within a lexical production references
172 // to non-lexical productions are invalid
173 if lexical && !isLexical(x.String) {
174 v.Error(x.Pos(), "reference to non-lexical production "+x.String)
175 }
176 case *Token:
177 // nothing to do for now
178 case *Range:
179 i := v.verifyChar(x.Begin)
180 j := v.verifyChar(x.End)
181 if i >= j {
182 v.Error(x.Pos(), "decreasing character range")
183 }
184 case *Group:
185 v.verifyExpr(x.Body, lexical)
186 case *Option:
187 v.verifyExpr(x.Body, lexical)
188 case *Repetition:
189 v.verifyExpr(x.Body, lexical)
190 default:
191 panic("unreachable")
192 }
193 }
194
195
196 func (v *verifier) verify(grammar Grammar, start string) {
197 // find root production
198 root, found := grammar[start]
199 if !found {
200 var noPos token.Position
201 v.Error(noPos, "no start production "+start)
202 return
203 }
204
205 // initialize verifier
206 v.ErrorVector.Reset()
207 v.worklist = v.worklist[0:0]
208 v.reached = make(Grammar)
209 v.grammar = grammar
210
211 // work through the worklist
212 v.push(root)
213 for {
214 n := len(v.worklist) - 1
215 if n < 0 {
216 break
217 }
218 prod := v.worklist[n]
219 v.worklist = v.worklist[0:n]
220 v.verifyExpr(prod.Expr, isLexical(prod.Name.String))
221 }
222
223 // check if all productions were reached
224 if len(v.reached) < len(v.grammar) {
225 for name, prod := range v.grammar {
226 if _, found := v.reached[name]; !found {
227 v.Error(prod.Pos(), name+" is unreachable")
228 }
229 }
230 }
231 }
232
233
234 // Verify checks that:
235 // - all productions used are defined
236 // - all productions defined are used when beginning at start
237 // - lexical productions refer only to other lexical productions
238 //
239 func Verify(grammar Grammar, start string) os.Error {
240 var v verifier
241 v.verify(grammar, start)
242 return v.GetError(scanner.Sorted)
243 }