1 // Copyright 2018 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
6 // This is a simplified copy of Google's buildifier parser.
20 // A Position describes an arbitrary source position in a file, including the
21 // file, line, column, and byte offset.
22 type Position struct {
23 Line int // line in input (starting at 1)
24 LineRune int // rune in line (starting at 1)
25 Byte int // byte in input (starting at 0)
28 // add returns the position at the end of s, assuming it starts at p.
29 func (p Position) add(s string) Position {
31 if n := strings.Count(s, "\n"); n > 0 {
33 s = s[strings.LastIndex(s, "\n")+1:]
36 p.LineRune += utf8.RuneCountInString(s)
40 // An Expr represents an input element.
42 // Span returns the start and end position of the expression,
43 // excluding leading or trailing comments.
44 Span() (start, end Position)
46 // Comment returns the comments attached to the expression.
47 // This method would normally be named 'Comments' but that
48 // would interfere with embedding a type of the same name.
52 // A Comment represents a single // comment.
55 Token string // without trailing newline
56 Suffix bool // an end of line (not whole line) comment
59 // Comments collects the comments associated with an expression.
60 type Comments struct {
61 Before []Comment // whole-line comments before this expression
62 Suffix []Comment // end-of-line comments after this expression
64 // For top-level expressions only, After lists whole-line
65 // comments following the expression.
69 // Comment returns the receiver. This isn't useful by itself, but
70 // a Comments struct is embedded into all the expression
71 // implementation types, and this gives each of those a Comment
72 // method to satisfy the Expr interface.
73 func (c *Comments) Comment() *Comments {
77 // A FileSyntax represents an entire go.mod file.
78 type FileSyntax struct {
79 Name string // file path
84 func (x *FileSyntax) Span() (start, end Position) {
88 start, _ = x.Stmt[0].Span()
89 _, end = x.Stmt[len(x.Stmt)-1].Span()
93 // addLine adds a line containing the given tokens to the file.
95 // If the first token of the hint matches the first token of the
96 // line, the new line is added at the end of the block containing hint,
97 // extracting hint into a new block if it is not yet in one.
99 // If the hint is non-nil buts its first token does not match,
100 // the new line is added after the block containing hint
101 // (or hint itself, if not in a block).
103 // If no hint is provided, addLine appends the line to the end of
104 // the last block with a matching first token,
105 // or to the end of the file if no such block exists.
106 func (x *FileSyntax) addLine(hint Expr, tokens ...string) *Line {
108 // If no hint given, add to the last statement of the given type.
110 for i := len(x.Stmt) - 1; i >= 0; i-- {
112 switch stmt := stmt.(type) {
114 if stmt.Token != nil && stmt.Token[0] == tokens[0] {
119 if stmt.Token[0] == tokens[0] {
127 newLineAfter := func(i int) *Line {
128 new := &Line{Token: tokens}
129 if i == len(x.Stmt) {
130 x.Stmt = append(x.Stmt, new)
132 x.Stmt = append(x.Stmt, nil)
133 copy(x.Stmt[i+2:], x.Stmt[i+1:])
140 for i, stmt := range x.Stmt {
141 switch stmt := stmt.(type) {
144 if stmt.Token == nil || stmt.Token[0] != tokens[0] {
145 return newLineAfter(i)
148 // Convert line to line block.
150 block := &LineBlock{Token: stmt.Token[:1], Line: []*Line{stmt}}
151 stmt.Token = stmt.Token[1:]
153 new := &Line{Token: tokens[1:], InBlock: true}
154 block.Line = append(block.Line, new)
160 if stmt.Token[0] != tokens[0] {
161 return newLineAfter(i)
164 new := &Line{Token: tokens[1:], InBlock: true}
165 stmt.Line = append(stmt.Line, new)
169 for j, line := range stmt.Line {
171 if stmt.Token[0] != tokens[0] {
172 return newLineAfter(i)
175 // Add new line after hint within the block.
176 stmt.Line = append(stmt.Line, nil)
177 copy(stmt.Line[j+2:], stmt.Line[j+1:])
178 new := &Line{Token: tokens[1:], InBlock: true}
187 new := &Line{Token: tokens}
188 x.Stmt = append(x.Stmt, new)
192 func (x *FileSyntax) updateLine(line *Line, tokens ...string) {
199 func (x *FileSyntax) removeLine(line *Line) {
203 // Cleanup cleans up the file syntax x after any edit operations.
204 // To avoid quadratic behavior, removeLine marks the line as dead
205 // by setting line.Token = nil but does not remove it from the slice
206 // in which it appears. After edits have all been indicated,
207 // calling Cleanup cleans out the dead lines.
208 func (x *FileSyntax) Cleanup() {
210 for _, stmt := range x.Stmt {
211 switch stmt := stmt.(type) {
213 if stmt.Token == nil {
218 for _, line := range stmt.Line {
219 if line.Token != nil {
228 // Collapse block into single line.
231 Before: commentsAdd(stmt.Before, stmt.Line[0].Before),
232 Suffix: commentsAdd(stmt.Line[0].Suffix, stmt.Suffix),
233 After: commentsAdd(stmt.Line[0].After, stmt.After),
235 Token: stringsAdd(stmt.Token, stmt.Line[0].Token),
241 stmt.Line = stmt.Line[:ww]
249 func commentsAdd(x, y []Comment) []Comment {
250 return append(x[:len(x):len(x)], y...)
253 func stringsAdd(x, y []string) []string {
254 return append(x[:len(x):len(x)], y...)
257 // A CommentBlock represents a top-level block of comments separate
259 type CommentBlock struct {
264 func (x *CommentBlock) Span() (start, end Position) {
265 return x.Start, x.Start
268 // A Line is a single line of tokens.
277 func (x *Line) Span() (start, end Position) {
278 return x.Start, x.End
281 // A LineBlock is a factored block of lines, like
288 type LineBlock struct {
297 func (x *LineBlock) Span() (start, end Position) {
298 return x.Start, x.RParen.Pos.add(")")
301 // An LParen represents the beginning of a parenthesized line block.
302 // It is a place to store suffix comments.
308 func (x *LParen) Span() (start, end Position) {
309 return x.Pos, x.Pos.add(")")
312 // An RParen represents the end of a parenthesized line block.
313 // It is a place to store whole-line (before) comments.
319 func (x *RParen) Span() (start, end Position) {
320 return x.Pos, x.Pos.add(")")
323 // An input represents a single input file being parsed.
326 filename string // name of input file, for errors
327 complete []byte // entire input
328 remaining []byte // remaining input
329 token []byte // token being scanned
330 lastToken string // most recently returned token, for error messages
331 pos Position // current input position
332 comments []Comment // accumulated comments
333 endRule int // position of end of current rule
336 file *FileSyntax // returned top-level syntax tree
337 parseError error // error encountered during parsing
339 // Comment assignment state.
340 pre []Expr // all expressions, in preorder traversal
341 post []Expr // all expressions, in postorder traversal
344 func newInput(filename string, data []byte) *input {
349 pos: Position{Line: 1, LineRune: 1, Byte: 0},
353 // parse parses the input file.
354 func parse(file string, data []byte) (f *FileSyntax, err error) {
355 in := newInput(file, data)
356 // The parser panics for both routine errors like syntax errors
357 // and for programmer bugs like array index errors.
358 // Turn both into error returns. Catching bug panics is
359 // especially important when processing many files.
361 if e := recover(); e != nil {
362 if e == in.parseError {
365 err = fmt.Errorf("%s:%d:%d: internal error: %v", in.filename, in.pos.Line, in.pos.LineRune, e)
370 // Invoke the parser.
372 if in.parseError != nil {
373 return nil, in.parseError
375 in.file.Name = in.filename
377 // Assign comments to nearby syntax.
383 // Error is called to report an error.
384 // The reason s is often "syntax error".
385 // Error does not return: it panics.
386 func (in *input) Error(s string) {
387 if s == "syntax error" && in.lastToken != "" {
388 s += " near " + in.lastToken
390 in.parseError = fmt.Errorf("%s:%d:%d: %v", in.filename, in.pos.Line, in.pos.LineRune, s)
394 // eof reports whether the input has reached end of file.
395 func (in *input) eof() bool {
396 return len(in.remaining) == 0
399 // peekRune returns the next rune in the input without consuming it.
400 func (in *input) peekRune() int {
401 if len(in.remaining) == 0 {
404 r, _ := utf8.DecodeRune(in.remaining)
408 // peekPrefix reports whether the remaining input begins with the given prefix.
409 func (in *input) peekPrefix(prefix string) bool {
410 // This is like bytes.HasPrefix(in.remaining, []byte(prefix))
411 // but without the allocation of the []byte copy of prefix.
412 for i := 0; i < len(prefix); i++ {
413 if i >= len(in.remaining) || in.remaining[i] != prefix[i] {
420 // readRune consumes and returns the next rune in the input.
421 func (in *input) readRune() int {
422 if len(in.remaining) == 0 {
423 in.Error("internal lexer error: readRune at EOF")
425 r, size := utf8.DecodeRune(in.remaining)
426 in.remaining = in.remaining[size:]
437 type symType struct {
443 // startToken marks the beginning of the next input token.
444 // It must be followed by a call to endToken, once the token has
445 // been consumed using readRune.
446 func (in *input) startToken(sym *symType) {
447 in.token = in.remaining
452 // endToken marks the end of an input token.
453 // It records the actual token string in sym.text if the caller
454 // has not done that already.
455 func (in *input) endToken(sym *symType) {
457 tok := string(in.token[:len(in.token)-len(in.remaining)])
459 in.lastToken = sym.text
464 // lex is called from the parser to obtain the next input token.
465 // It returns the token value (either a rune like '+' or a symbolic token _FOR)
466 // and sets val to the data associated with the token.
467 // For all our input tokens, the associated data is
468 // val.Pos (the position where the token begins)
469 // and val.Token (the input string corresponding to the token).
470 func (in *input) lex(sym *symType) int {
471 // Skip past spaces, stopping at non-space or EOF.
472 countNL := 0 // number of newlines we've skipped past
474 // Skip over spaces. Count newlines so we can give the parser
475 // information about where top-level blank lines are,
476 // for top-level comment assignment.
478 if c == ' ' || c == '\t' || c == '\r' {
483 // Comment runs to end of line.
484 if in.peekPrefix("//") {
487 // Is this comment the only thing on its line?
488 // Find the last \n before this // and see if it's all
489 // spaces from there to here.
490 i := bytes.LastIndex(in.complete[:in.pos.Byte], []byte("\n"))
491 suffix := len(bytes.TrimSpace(in.complete[i+1:in.pos.Byte])) > 0
496 for len(in.remaining) > 0 && in.readRune() != '\n' {
500 sym.text = strings.TrimRight(sym.text, "\n")
501 in.lastToken = "comment"
503 // If we are at top level (not in a statement), hand the comment to
504 // the parser as a _COMMENT token. The grammar is written
505 // to handle top-level comments itself.
507 // Not in a statement. Tell parser about top-level comment.
511 // Otherwise, save comment for later attachment to syntax tree.
513 in.comments = append(in.comments, Comment{sym.pos, "", false})
515 in.comments = append(in.comments, Comment{sym.pos, sym.text, suffix})
520 if in.peekPrefix("/*") {
521 in.Error(fmt.Sprintf("mod files must use // comments (not /* */ comments)"))
524 // Found non-space non-comment.
528 // Found the beginning of the next token.
530 defer in.endToken(sym)
538 // Punctuation tokens.
539 switch c := in.peekRune(); c {
552 case '"', '`': // quoted string
558 in.Error("unexpected EOF in string")
560 if in.peekRune() == '\n' {
561 in.Error("unexpected newline in string")
567 if c == '\\' && quote != '`' {
570 in.Error("unexpected EOF in string")
579 // Checked all punctuation. Must be identifier token.
580 if c := in.peekRune(); !isIdent(c) {
581 in.Error(fmt.Sprintf("unexpected input character %#q", c))
584 // Scan over identifier.
585 for isIdent(in.peekRune()) {
586 if in.peekPrefix("//") {
589 if in.peekPrefix("/*") {
590 in.Error(fmt.Sprintf("mod files must use // comments (not /* */ comments)"))
597 // isIdent reports whether c is an identifier rune.
598 // We treat nearly all runes as identifier runes.
599 func isIdent(c int) bool {
600 return c != 0 && !unicode.IsSpace(rune(c))
603 // Comment assignment.
604 // We build two lists of all subexpressions, preorder and postorder.
605 // The preorder list is ordered by start location, with outer expressions first.
606 // The postorder list is ordered by end location, with outer expressions last.
607 // We use the preorder list to assign each whole-line comment to the syntax
608 // immediately following it, and we use the postorder list to assign each
609 // end-of-line comment to the syntax immediately preceding it.
611 // order walks the expression adding it and its subexpressions to the
612 // preorder and postorder lists.
613 func (in *input) order(x Expr) {
615 in.pre = append(in.pre, x)
617 switch x := x.(type) {
619 panic(fmt.Errorf("order: unexpected type %T", x))
622 case *LParen, *RParen:
629 for _, stmt := range x.Stmt {
634 for _, l := range x.Line {
640 in.post = append(in.post, x)
644 // assignComments attaches comments to nearby syntax.
645 func (in *input) assignComments() {
648 // Generate preorder and postorder lists.
651 // Split into whole-line comments and suffix comments.
652 var line, suffix []Comment
653 for _, com := range in.comments {
655 suffix = append(suffix, com)
657 line = append(line, com)
662 for _, c := range line {
663 fmt.Fprintf(os.Stderr, "LINE %q :%d:%d #%d\n", c.Token, c.Start.Line, c.Start.LineRune, c.Start.Byte)
667 // Assign line comments to syntax immediately following.
668 for _, x := range in.pre {
671 fmt.Printf("pre %T :%d:%d #%d\n", x, start.Line, start.LineRune, start.Byte)
674 for len(line) > 0 && start.Byte >= line[0].Start.Byte {
676 fmt.Fprintf(os.Stderr, "ASSIGN LINE %q #%d\n", line[0].Token, line[0].Start.Byte)
678 xcom.Before = append(xcom.Before, line[0])
683 // Remaining line comments go at end of file.
684 in.file.After = append(in.file.After, line...)
687 for _, c := range suffix {
688 fmt.Fprintf(os.Stderr, "SUFFIX %q :%d:%d #%d\n", c.Token, c.Start.Line, c.Start.LineRune, c.Start.Byte)
692 // Assign suffix comments to syntax immediately before.
693 for i := len(in.post) - 1; i >= 0; i-- {
696 start, end := x.Span()
698 fmt.Printf("post %T :%d:%d #%d :%d:%d #%d\n", x, start.Line, start.LineRune, start.Byte, end.Line, end.LineRune, end.Byte)
701 // Do not assign suffix comments to end of line block or whole file.
702 // Instead assign them to the last element inside.
708 // Do not assign suffix comments to something that starts
709 // on an earlier line, so that in
714 // we assign the comment to z and not to x ( ... ).
715 if start.Line != end.Line {
719 for len(suffix) > 0 && end.Byte <= suffix[len(suffix)-1].Start.Byte {
721 fmt.Fprintf(os.Stderr, "ASSIGN SUFFIX %q #%d\n", suffix[len(suffix)-1].Token, suffix[len(suffix)-1].Start.Byte)
723 xcom.Suffix = append(xcom.Suffix, suffix[len(suffix)-1])
724 suffix = suffix[:len(suffix)-1]
728 // We assigned suffix comments in reverse.
729 // If multiple suffix comments were appended to the same
730 // expression node, they are now in reverse. Fix that.
731 for _, x := range in.post {
732 reverseComments(x.Comment().Suffix)
735 // Remaining suffix comments go at beginning of file.
736 in.file.Before = append(in.file.Before, suffix...)
739 // reverseComments reverses the []Comment list.
740 func reverseComments(list []Comment) {
741 for i, j := 0, len(list)-1; i < j; i, j = i+1, j-1 {
742 list[i], list[j] = list[j], list[i]
746 func (in *input) parseFile() {
747 in.file = new(FileSyntax)
755 in.file.Stmt = append(in.file.Stmt, cb)
760 cb = &CommentBlock{Start: sym.pos}
763 com.Before = append(com.Before, Comment{Start: sym.pos, Token: sym.text})
766 in.file.Stmt = append(in.file.Stmt, cb)
772 in.file.Stmt[len(in.file.Stmt)-1].Comment().Before = cb.Before
779 func (in *input) parseStmt(sym *symType) {
782 token := []string{sym.text}
786 case '\n', _EOF, _EOL:
787 in.file.Stmt = append(in.file.Stmt, &Line{
794 in.file.Stmt = append(in.file.Stmt, in.parseLineBlock(start, token, sym))
797 token = append(token, sym.text)
803 func (in *input) parseLineBlock(start Position, token []string, sym *symType) *LineBlock {
807 LParen: LParen{Pos: sym.pos},
809 var comments []Comment
816 if len(comments) == 0 && len(x.Line) > 0 || len(comments) > 0 && comments[len(comments)-1].Token != "" {
817 comments = append(comments, Comment{})
820 comments = append(comments, Comment{Start: sym.pos, Token: sym.text})
822 in.Error(fmt.Sprintf("syntax error (unterminated block started at %s:%d:%d)", in.filename, x.Start.Line, x.Start.LineRune))
824 x.RParen.Before = comments
825 x.RParen.Pos = sym.pos
827 if tok != '\n' && tok != _EOF && tok != _EOL {
828 in.Error("syntax error (expected newline after closing paren)")
832 l := in.parseLine(sym)
833 x.Line = append(x.Line, l)
834 l.Comment().Before = comments
840 func (in *input) parseLine(sym *symType) *Line {
843 token := []string{sym.text}
847 case '\n', _EOF, _EOL:
855 token = append(token, sym.text)
870 slashSlash = []byte("//")
871 moduleStr = []byte("module")
874 // ModulePath returns the module path from the gomod file text.
875 // If it cannot find a module path, it returns an empty string.
876 // It is tolerant of unrelated problems in the go.mod file.
877 func ModulePath(mod []byte) string {
881 if i := bytes.IndexByte(line, '\n'); i >= 0 {
882 line, mod = line[:i], line[i+1:]
884 if i := bytes.Index(line, slashSlash); i >= 0 {
887 line = bytes.TrimSpace(line)
888 if !bytes.HasPrefix(line, moduleStr) {
891 line = line[len(moduleStr):]
893 line = bytes.TrimSpace(line)
894 if len(line) == n || len(line) == 0 {
898 if line[0] == '"' || line[0] == '`' {
899 p, err := strconv.Unquote(string(line))
901 return "" // malformed quoted string or multiline module path
908 return "" // missing module path