libgo/go/golang.org/x/mod/modfile/read.go

   1 // Copyright 2018 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 // Module file parser.
   6 // This is a simplified copy of Google's buildifier parser.
   7
   8 package modfile
   9
  10 import (
  11         "bytes"
  12         "fmt"
  13         "os"
  14         "strconv"
  15         "strings"
  16         "unicode"
  17         "unicode/utf8"
  18 )
  19
  20 // A Position describes an arbitrary source position in a file, including the
  21 // file, line, column, and byte offset.
  22 type Position struct {
  23         Line     int // line in input (starting at 1)
  24         LineRune int // rune in line (starting at 1)
  25         Byte     int // byte in input (starting at 0)
  26 }
  27
  28 // add returns the position at the end of s, assuming it starts at p.
  29 func (p Position) add(s string) Position {
  30         p.Byte += len(s)
  31         if n := strings.Count(s, "\n"); n > 0 {
  32                 p.Line += n
  33                 s = s[strings.LastIndex(s, "\n")+1:]
  34                 p.LineRune = 1
  35         }
  36         p.LineRune += utf8.RuneCountInString(s)
  37         return p
  38 }
  39
  40 // An Expr represents an input element.
  41 type Expr interface {
  42         // Span returns the start and end position of the expression,
  43         // excluding leading or trailing comments.
  44         Span() (start, end Position)
  45
  46         // Comment returns the comments attached to the expression.
  47         // This method would normally be named 'Comments' but that
  48         // would interfere with embedding a type of the same name.
  49         Comment() *Comments
  50 }
  51
  52 // A Comment represents a single // comment.
  53 type Comment struct {
  54         Start  Position
  55         Token  string // without trailing newline
  56         Suffix bool   // an end of line (not whole line) comment
  57 }
  58
  59 // Comments collects the comments associated with an expression.
  60 type Comments struct {
  61         Before []Comment // whole-line comments before this expression
  62         Suffix []Comment // end-of-line comments after this expression
  63
  64         // For top-level expressions only, After lists whole-line
  65         // comments following the expression.
  66         After []Comment
  67 }
  68
  69 // Comment returns the receiver. This isn't useful by itself, but
  70 // a Comments struct is embedded into all the expression
  71 // implementation types, and this gives each of those a Comment
  72 // method to satisfy the Expr interface.
  73 func (c *Comments) Comment() *Comments {
  74         return c
  75 }
  76
  77 // A FileSyntax represents an entire go.mod file.
  78 type FileSyntax struct {
  79         Name string // file path
  80         Comments
  81         Stmt []Expr
  82 }
  83
  84 func (x *FileSyntax) Span() (start, end Position) {
  85         if len(x.Stmt) == 0 {
  86                 return
  87         }
  88         start, _ = x.Stmt[0].Span()
  89         _, end = x.Stmt[len(x.Stmt)-1].Span()
  90         return start, end
  91 }
  92
  93 // addLine adds a line containing the given tokens to the file.
  94 //
  95 // If the first token of the hint matches the first token of the
  96 // line, the new line is added at the end of the block containing hint,
  97 // extracting hint into a new block if it is not yet in one.
  98 //
  99 // If the hint is non-nil buts its first token does not match,
 100 // the new line is added after the block containing hint
 101 // (or hint itself, if not in a block).
 102 //
 103 // If no hint is provided, addLine appends the line to the end of
 104 // the last block with a matching first token,
 105 // or to the end of the file if no such block exists.
 106 func (x *FileSyntax) addLine(hint Expr, tokens ...string) *Line {
 107         if hint == nil {
 108                 // If no hint given, add to the last statement of the given type.
 109         Loop:
 110                 for i := len(x.Stmt) - 1; i >= 0; i-- {
 111                         stmt := x.Stmt[i]
 112                         switch stmt := stmt.(type) {
 113                         case *Line:
 114                                 if stmt.Token != nil && stmt.Token[0] == tokens[0] {
 115                                         hint = stmt
 116                                         break Loop
 117                                 }
 118                         case *LineBlock:
 119                                 if stmt.Token[0] == tokens[0] {
 120                                         hint = stmt
 121                                         break Loop
 122                                 }
 123                         }
 124                 }
 125         }
 126
 127         newLineAfter := func(i int) *Line {
 128                 new := &Line{Token: tokens}
 129                 if i == len(x.Stmt) {
 130                         x.Stmt = append(x.Stmt, new)
 131                 } else {
 132                         x.Stmt = append(x.Stmt, nil)
 133                         copy(x.Stmt[i+2:], x.Stmt[i+1:])
 134                         x.Stmt[i+1] = new
 135                 }
 136                 return new
 137         }
 138
 139         if hint != nil {
 140                 for i, stmt := range x.Stmt {
 141                         switch stmt := stmt.(type) {
 142                         case *Line:
 143                                 if stmt == hint {
 144                                         if stmt.Token == nil || stmt.Token[0] != tokens[0] {
 145                                                 return newLineAfter(i)
 146                                         }
 147
 148                                         // Convert line to line block.
 149                                         stmt.InBlock = true
 150                                         block := &LineBlock{Token: stmt.Token[:1], Line: []*Line{stmt}}
 151                                         stmt.Token = stmt.Token[1:]
 152                                         x.Stmt[i] = block
 153                                         new := &Line{Token: tokens[1:], InBlock: true}
 154                                         block.Line = append(block.Line, new)
 155                                         return new
 156                                 }
 157
 158                         case *LineBlock:
 159                                 if stmt == hint {
 160                                         if stmt.Token[0] != tokens[0] {
 161                                                 return newLineAfter(i)
 162                                         }
 163
 164                                         new := &Line{Token: tokens[1:], InBlock: true}
 165                                         stmt.Line = append(stmt.Line, new)
 166                                         return new
 167                                 }
 168
 169                                 for j, line := range stmt.Line {
 170                                         if line == hint {
 171                                                 if stmt.Token[0] != tokens[0] {
 172                                                         return newLineAfter(i)
 173                                                 }
 174
 175                                                 // Add new line after hint within the block.
 176                                                 stmt.Line = append(stmt.Line, nil)
 177                                                 copy(stmt.Line[j+2:], stmt.Line[j+1:])
 178                                                 new := &Line{Token: tokens[1:], InBlock: true}
 179                                                 stmt.Line[j+1] = new
 180                                                 return new
 181                                         }
 182                                 }
 183                         }
 184                 }
 185         }
 186
 187         new := &Line{Token: tokens}
 188         x.Stmt = append(x.Stmt, new)
 189         return new
 190 }
 191
 192 func (x *FileSyntax) updateLine(line *Line, tokens ...string) {
 193         if line.InBlock {
 194                 tokens = tokens[1:]
 195         }
 196         line.Token = tokens
 197 }
 198
 199 func (x *FileSyntax) removeLine(line *Line) {
 200         line.Token = nil
 201 }
 202
 203 // Cleanup cleans up the file syntax x after any edit operations.
 204 // To avoid quadratic behavior, removeLine marks the line as dead
 205 // by setting line.Token = nil but does not remove it from the slice
 206 // in which it appears. After edits have all been indicated,
 207 // calling Cleanup cleans out the dead lines.
 208 func (x *FileSyntax) Cleanup() {
 209         w := 0
 210         for _, stmt := range x.Stmt {
 211                 switch stmt := stmt.(type) {
 212                 case *Line:
 213                         if stmt.Token == nil {
 214                                 continue
 215                         }
 216                 case *LineBlock:
 217                         ww := 0
 218                         for _, line := range stmt.Line {
 219                                 if line.Token != nil {
 220                                         stmt.Line[ww] = line
 221                                         ww++
 222                                 }
 223                         }
 224                         if ww == 0 {
 225                                 continue
 226                         }
 227                         if ww == 1 {
 228                                 // Collapse block into single line.
 229                                 line := &Line{
 230                                         Comments: Comments{
 231                                                 Before: commentsAdd(stmt.Before, stmt.Line[0].Before),
 232                                                 Suffix: commentsAdd(stmt.Line[0].Suffix, stmt.Suffix),
 233                                                 After:  commentsAdd(stmt.Line[0].After, stmt.After),
 234                                         },
 235                                         Token: stringsAdd(stmt.Token, stmt.Line[0].Token),
 236                                 }
 237                                 x.Stmt[w] = line
 238                                 w++
 239                                 continue
 240                         }
 241                         stmt.Line = stmt.Line[:ww]
 242                 }
 243                 x.Stmt[w] = stmt
 244                 w++
 245         }
 246         x.Stmt = x.Stmt[:w]
 247 }
 248
 249 func commentsAdd(x, y []Comment) []Comment {
 250         return append(x[:len(x):len(x)], y...)
 251 }
 252
 253 func stringsAdd(x, y []string) []string {
 254         return append(x[:len(x):len(x)], y...)
 255 }
 256
 257 // A CommentBlock represents a top-level block of comments separate
 258 // from any rule.
 259 type CommentBlock struct {
 260         Comments
 261         Start Position
 262 }
 263
 264 func (x *CommentBlock) Span() (start, end Position) {
 265         return x.Start, x.Start
 266 }
 267
 268 // A Line is a single line of tokens.
 269 type Line struct {
 270         Comments
 271         Start   Position
 272         Token   []string
 273         InBlock bool
 274         End     Position
 275 }
 276
 277 func (x *Line) Span() (start, end Position) {
 278         return x.Start, x.End
 279 }
 280
 281 // A LineBlock is a factored block of lines, like
 282 //
 283 //      require (
 284 //              "x"
 285 //              "y"
 286 //      )
 287 //
 288 type LineBlock struct {
 289         Comments
 290         Start  Position
 291         LParen LParen
 292         Token  []string
 293         Line   []*Line
 294         RParen RParen
 295 }
 296
 297 func (x *LineBlock) Span() (start, end Position) {
 298         return x.Start, x.RParen.Pos.add(")")
 299 }
 300
 301 // An LParen represents the beginning of a parenthesized line block.
 302 // It is a place to store suffix comments.
 303 type LParen struct {
 304         Comments
 305         Pos Position
 306 }
 307
 308 func (x *LParen) Span() (start, end Position) {
 309         return x.Pos, x.Pos.add(")")
 310 }
 311
 312 // An RParen represents the end of a parenthesized line block.
 313 // It is a place to store whole-line (before) comments.
 314 type RParen struct {
 315         Comments
 316         Pos Position
 317 }
 318
 319 func (x *RParen) Span() (start, end Position) {
 320         return x.Pos, x.Pos.add(")")
 321 }
 322
 323 // An input represents a single input file being parsed.
 324 type input struct {
 325         // Lexing state.
 326         filename  string    // name of input file, for errors
 327         complete  []byte    // entire input
 328         remaining []byte    // remaining input
 329         token     []byte    // token being scanned
 330         lastToken string    // most recently returned token, for error messages
 331         pos       Position  // current input position
 332         comments  []Comment // accumulated comments
 333         endRule   int       // position of end of current rule
 334
 335         // Parser state.
 336         file       *FileSyntax // returned top-level syntax tree
 337         parseError error       // error encountered during parsing
 338
 339         // Comment assignment state.
 340         pre  []Expr // all expressions, in preorder traversal
 341         post []Expr // all expressions, in postorder traversal
 342 }
 343
 344 func newInput(filename string, data []byte) *input {
 345         return &input{
 346                 filename:  filename,
 347                 complete:  data,
 348                 remaining: data,
 349                 pos:       Position{Line: 1, LineRune: 1, Byte: 0},
 350         }
 351 }
 352
 353 // parse parses the input file.
 354 func parse(file string, data []byte) (f *FileSyntax, err error) {
 355         in := newInput(file, data)
 356         // The parser panics for both routine errors like syntax errors
 357         // and for programmer bugs like array index errors.
 358         // Turn both into error returns. Catching bug panics is
 359         // especially important when processing many files.
 360         defer func() {
 361                 if e := recover(); e != nil {
 362                         if e == in.parseError {
 363                                 err = in.parseError
 364                         } else {
 365                                 err = fmt.Errorf("%s:%d:%d: internal error: %v", in.filename, in.pos.Line, in.pos.LineRune, e)
 366                         }
 367                 }
 368         }()
 369
 370         // Invoke the parser.
 371         in.parseFile()
 372         if in.parseError != nil {
 373                 return nil, in.parseError
 374         }
 375         in.file.Name = in.filename
 376
 377         // Assign comments to nearby syntax.
 378         in.assignComments()
 379
 380         return in.file, nil
 381 }
 382
 383 // Error is called to report an error.
 384 // The reason s is often "syntax error".
 385 // Error does not return: it panics.
 386 func (in *input) Error(s string) {
 387         if s == "syntax error" && in.lastToken != "" {
 388                 s += " near " + in.lastToken
 389         }
 390         in.parseError = fmt.Errorf("%s:%d:%d: %v", in.filename, in.pos.Line, in.pos.LineRune, s)
 391         panic(in.parseError)
 392 }
 393
 394 // eof reports whether the input has reached end of file.
 395 func (in *input) eof() bool {
 396         return len(in.remaining) == 0
 397 }
 398
 399 // peekRune returns the next rune in the input without consuming it.
 400 func (in *input) peekRune() int {
 401         if len(in.remaining) == 0 {
 402                 return 0
 403         }
 404         r, _ := utf8.DecodeRune(in.remaining)
 405         return int(r)
 406 }
 407
 408 // peekPrefix reports whether the remaining input begins with the given prefix.
 409 func (in *input) peekPrefix(prefix string) bool {
 410         // This is like bytes.HasPrefix(in.remaining, []byte(prefix))
 411         // but without the allocation of the []byte copy of prefix.
 412         for i := 0; i < len(prefix); i++ {
 413                 if i >= len(in.remaining) || in.remaining[i] != prefix[i] {
 414                         return false
 415                 }
 416         }
 417         return true
 418 }
 419
 420 // readRune consumes and returns the next rune in the input.
 421 func (in *input) readRune() int {
 422         if len(in.remaining) == 0 {
 423                 in.Error("internal lexer error: readRune at EOF")
 424         }
 425         r, size := utf8.DecodeRune(in.remaining)
 426         in.remaining = in.remaining[size:]
 427         if r == '\n' {
 428                 in.pos.Line++
 429                 in.pos.LineRune = 1
 430         } else {
 431                 in.pos.LineRune++
 432         }
 433         in.pos.Byte += size
 434         return int(r)
 435 }
 436
 437 type symType struct {
 438         pos    Position
 439         endPos Position
 440         text   string
 441 }
 442
 443 // startToken marks the beginning of the next input token.
 444 // It must be followed by a call to endToken, once the token has
 445 // been consumed using readRune.
 446 func (in *input) startToken(sym *symType) {
 447         in.token = in.remaining
 448         sym.text = ""
 449         sym.pos = in.pos
 450 }
 451
 452 // endToken marks the end of an input token.
 453 // It records the actual token string in sym.text if the caller
 454 // has not done that already.
 455 func (in *input) endToken(sym *symType) {
 456         if sym.text == "" {
 457                 tok := string(in.token[:len(in.token)-len(in.remaining)])
 458                 sym.text = tok
 459                 in.lastToken = sym.text
 460         }
 461         sym.endPos = in.pos
 462 }
 463
 464 // lex is called from the parser to obtain the next input token.
 465 // It returns the token value (either a rune like '+' or a symbolic token _FOR)
 466 // and sets val to the data associated with the token.
 467 // For all our input tokens, the associated data is
 468 // val.Pos (the position where the token begins)
 469 // and val.Token (the input string corresponding to the token).
 470 func (in *input) lex(sym *symType) int {
 471         // Skip past spaces, stopping at non-space or EOF.
 472         countNL := 0 // number of newlines we've skipped past
 473         for !in.eof() {
 474                 // Skip over spaces. Count newlines so we can give the parser
 475                 // information about where top-level blank lines are,
 476                 // for top-level comment assignment.
 477                 c := in.peekRune()
 478                 if c == ' ' || c == '\t' || c == '\r' {
 479                         in.readRune()
 480                         continue
 481                 }
 482
 483                 // Comment runs to end of line.
 484                 if in.peekPrefix("//") {
 485                         in.startToken(sym)
 486
 487                         // Is this comment the only thing on its line?
 488                         // Find the last \n before this // and see if it's all
 489                         // spaces from there to here.
 490                         i := bytes.LastIndex(in.complete[:in.pos.Byte], []byte("\n"))
 491                         suffix := len(bytes.TrimSpace(in.complete[i+1:in.pos.Byte])) > 0
 492                         in.readRune()
 493                         in.readRune()
 494
 495                         // Consume comment.
 496                         for len(in.remaining) > 0 && in.readRune() != '\n' {
 497                         }
 498                         in.endToken(sym)
 499
 500                         sym.text = strings.TrimRight(sym.text, "\n")
 501                         in.lastToken = "comment"
 502
 503                         // If we are at top level (not in a statement), hand the comment to
 504                         // the parser as a _COMMENT token. The grammar is written
 505                         // to handle top-level comments itself.
 506                         if !suffix {
 507                                 // Not in a statement. Tell parser about top-level comment.
 508                                 return _COMMENT
 509                         }
 510
 511                         // Otherwise, save comment for later attachment to syntax tree.
 512                         if countNL > 1 {
 513                                 in.comments = append(in.comments, Comment{sym.pos, "", false})
 514                         }
 515                         in.comments = append(in.comments, Comment{sym.pos, sym.text, suffix})
 516                         countNL = 1
 517                         return _EOL
 518                 }
 519
 520                 if in.peekPrefix("/*") {
 521                         in.Error(fmt.Sprintf("mod files must use // comments (not /* */ comments)"))
 522                 }
 523
 524                 // Found non-space non-comment.
 525                 break
 526         }
 527
 528         // Found the beginning of the next token.
 529         in.startToken(sym)
 530         defer in.endToken(sym)
 531
 532         // End of file.
 533         if in.eof() {
 534                 in.lastToken = "EOF"
 535                 return _EOF
 536         }
 537
 538         // Punctuation tokens.
 539         switch c := in.peekRune(); c {
 540         case '\n':
 541                 in.readRune()
 542                 return c
 543
 544         case '(':
 545                 in.readRune()
 546                 return c
 547
 548         case ')':
 549                 in.readRune()
 550                 return c
 551
 552         case '"', '`': // quoted string
 553                 quote := c
 554                 in.readRune()
 555                 for {
 556                         if in.eof() {
 557                                 in.pos = sym.pos
 558                                 in.Error("unexpected EOF in string")
 559                         }
 560                         if in.peekRune() == '\n' {
 561                                 in.Error("unexpected newline in string")
 562                         }
 563                         c := in.readRune()
 564                         if c == quote {
 565                                 break
 566                         }
 567                         if c == '\\' && quote != '`' {
 568                                 if in.eof() {
 569                                         in.pos = sym.pos
 570                                         in.Error("unexpected EOF in string")
 571                                 }
 572                                 in.readRune()
 573                         }
 574                 }
 575                 in.endToken(sym)
 576                 return _STRING
 577         }
 578
 579         // Checked all punctuation. Must be identifier token.
 580         if c := in.peekRune(); !isIdent(c) {
 581                 in.Error(fmt.Sprintf("unexpected input character %#q", c))
 582         }
 583
 584         // Scan over identifier.
 585         for isIdent(in.peekRune()) {
 586                 if in.peekPrefix("//") {
 587                         break
 588                 }
 589                 if in.peekPrefix("/*") {
 590                         in.Error(fmt.Sprintf("mod files must use // comments (not /* */ comments)"))
 591                 }
 592                 in.readRune()
 593         }
 594         return _IDENT
 595 }
 596
 597 // isIdent reports whether c is an identifier rune.
 598 // We treat nearly all runes as identifier runes.
 599 func isIdent(c int) bool {
 600         return c != 0 && !unicode.IsSpace(rune(c))
 601 }
 602
 603 // Comment assignment.
 604 // We build two lists of all subexpressions, preorder and postorder.
 605 // The preorder list is ordered by start location, with outer expressions first.
 606 // The postorder list is ordered by end location, with outer expressions last.
 607 // We use the preorder list to assign each whole-line comment to the syntax
 608 // immediately following it, and we use the postorder list to assign each
 609 // end-of-line comment to the syntax immediately preceding it.
 610
 611 // order walks the expression adding it and its subexpressions to the
 612 // preorder and postorder lists.
 613 func (in *input) order(x Expr) {
 614         if x != nil {
 615                 in.pre = append(in.pre, x)
 616         }
 617         switch x := x.(type) {
 618         default:
 619                 panic(fmt.Errorf("order: unexpected type %T", x))
 620         case nil:
 621                 // nothing
 622         case *LParen, *RParen:
 623                 // nothing
 624         case *CommentBlock:
 625                 // nothing
 626         case *Line:
 627                 // nothing
 628         case *FileSyntax:
 629                 for _, stmt := range x.Stmt {
 630                         in.order(stmt)
 631                 }
 632         case *LineBlock:
 633                 in.order(&x.LParen)
 634                 for _, l := range x.Line {
 635                         in.order(l)
 636                 }
 637                 in.order(&x.RParen)
 638         }
 639         if x != nil {
 640                 in.post = append(in.post, x)
 641         }
 642 }
 643
 644 // assignComments attaches comments to nearby syntax.
 645 func (in *input) assignComments() {
 646         const debug = false
 647
 648         // Generate preorder and postorder lists.
 649         in.order(in.file)
 650
 651         // Split into whole-line comments and suffix comments.
 652         var line, suffix []Comment
 653         for _, com := range in.comments {
 654                 if com.Suffix {
 655                         suffix = append(suffix, com)
 656                 } else {
 657                         line = append(line, com)
 658                 }
 659         }
 660
 661         if debug {
 662                 for _, c := range line {
 663                         fmt.Fprintf(os.Stderr, "LINE %q :%d:%d #%d\n", c.Token, c.Start.Line, c.Start.LineRune, c.Start.Byte)
 664                 }
 665         }
 666
 667         // Assign line comments to syntax immediately following.
 668         for _, x := range in.pre {
 669                 start, _ := x.Span()
 670                 if debug {
 671                         fmt.Printf("pre %T :%d:%d #%d\n", x, start.Line, start.LineRune, start.Byte)
 672                 }
 673                 xcom := x.Comment()
 674                 for len(line) > 0 && start.Byte >= line[0].Start.Byte {
 675                         if debug {
 676                                 fmt.Fprintf(os.Stderr, "ASSIGN LINE %q #%d\n", line[0].Token, line[0].Start.Byte)
 677                         }
 678                         xcom.Before = append(xcom.Before, line[0])
 679                         line = line[1:]
 680                 }
 681         }
 682
 683         // Remaining line comments go at end of file.
 684         in.file.After = append(in.file.After, line...)
 685
 686         if debug {
 687                 for _, c := range suffix {
 688                         fmt.Fprintf(os.Stderr, "SUFFIX %q :%d:%d #%d\n", c.Token, c.Start.Line, c.Start.LineRune, c.Start.Byte)
 689                 }
 690         }
 691
 692         // Assign suffix comments to syntax immediately before.
 693         for i := len(in.post) - 1; i >= 0; i-- {
 694                 x := in.post[i]
 695
 696                 start, end := x.Span()
 697                 if debug {
 698                         fmt.Printf("post %T :%d:%d #%d :%d:%d #%d\n", x, start.Line, start.LineRune, start.Byte, end.Line, end.LineRune, end.Byte)
 699                 }
 700
 701                 // Do not assign suffix comments to end of line block or whole file.
 702                 // Instead assign them to the last element inside.
 703                 switch x.(type) {
 704                 case *FileSyntax:
 705                         continue
 706                 }
 707
 708                 // Do not assign suffix comments to something that starts
 709                 // on an earlier line, so that in
 710                 //
 711                 //      x ( y
 712                 //              z ) // comment
 713                 //
 714                 // we assign the comment to z and not to x ( ... ).
 715                 if start.Line != end.Line {
 716                         continue
 717                 }
 718                 xcom := x.Comment()
 719                 for len(suffix) > 0 && end.Byte <= suffix[len(suffix)-1].Start.Byte {
 720                         if debug {
 721                                 fmt.Fprintf(os.Stderr, "ASSIGN SUFFIX %q #%d\n", suffix[len(suffix)-1].Token, suffix[len(suffix)-1].Start.Byte)
 722                         }
 723                         xcom.Suffix = append(xcom.Suffix, suffix[len(suffix)-1])
 724                         suffix = suffix[:len(suffix)-1]
 725                 }
 726         }
 727
 728         // We assigned suffix comments in reverse.
 729         // If multiple suffix comments were appended to the same
 730         // expression node, they are now in reverse. Fix that.
 731         for _, x := range in.post {
 732                 reverseComments(x.Comment().Suffix)
 733         }
 734
 735         // Remaining suffix comments go at beginning of file.
 736         in.file.Before = append(in.file.Before, suffix...)
 737 }
 738
 739 // reverseComments reverses the []Comment list.
 740 func reverseComments(list []Comment) {
 741         for i, j := 0, len(list)-1; i < j; i, j = i+1, j-1 {
 742                 list[i], list[j] = list[j], list[i]
 743         }
 744 }
 745
 746 func (in *input) parseFile() {
 747         in.file = new(FileSyntax)
 748         var sym symType
 749         var cb *CommentBlock
 750         for {
 751                 tok := in.lex(&sym)
 752                 switch tok {
 753                 case '\n':
 754                         if cb != nil {
 755                                 in.file.Stmt = append(in.file.Stmt, cb)
 756                                 cb = nil
 757                         }
 758                 case _COMMENT:
 759                         if cb == nil {
 760                                 cb = &CommentBlock{Start: sym.pos}
 761                         }
 762                         com := cb.Comment()
 763                         com.Before = append(com.Before, Comment{Start: sym.pos, Token: sym.text})
 764                 case _EOF:
 765                         if cb != nil {
 766                                 in.file.Stmt = append(in.file.Stmt, cb)
 767                         }
 768                         return
 769                 default:
 770                         in.parseStmt(&sym)
 771                         if cb != nil {
 772                                 in.file.Stmt[len(in.file.Stmt)-1].Comment().Before = cb.Before
 773                                 cb = nil
 774                         }
 775                 }
 776         }
 777 }
 778
 779 func (in *input) parseStmt(sym *symType) {
 780         start := sym.pos
 781         end := sym.endPos
 782         token := []string{sym.text}
 783         for {
 784                 tok := in.lex(sym)
 785                 switch tok {
 786                 case '\n', _EOF, _EOL:
 787                         in.file.Stmt = append(in.file.Stmt, &Line{
 788                                 Start: start,
 789                                 Token: token,
 790                                 End:   end,
 791                         })
 792                         return
 793                 case '(':
 794                         in.file.Stmt = append(in.file.Stmt, in.parseLineBlock(start, token, sym))
 795                         return
 796                 default:
 797                         token = append(token, sym.text)
 798                         end = sym.endPos
 799                 }
 800         }
 801 }
 802
 803 func (in *input) parseLineBlock(start Position, token []string, sym *symType) *LineBlock {
 804         x := &LineBlock{
 805                 Start:  start,
 806                 Token:  token,
 807                 LParen: LParen{Pos: sym.pos},
 808         }
 809         var comments []Comment
 810         for {
 811                 tok := in.lex(sym)
 812                 switch tok {
 813                 case _EOL:
 814                         // ignore
 815                 case '\n':
 816                         if len(comments) == 0 && len(x.Line) > 0 || len(comments) > 0 && comments[len(comments)-1].Token != "" {
 817                                 comments = append(comments, Comment{})
 818                         }
 819                 case _COMMENT:
 820                         comments = append(comments, Comment{Start: sym.pos, Token: sym.text})
 821                 case _EOF:
 822                         in.Error(fmt.Sprintf("syntax error (unterminated block started at %s:%d:%d)", in.filename, x.Start.Line, x.Start.LineRune))
 823                 case ')':
 824                         x.RParen.Before = comments
 825                         x.RParen.Pos = sym.pos
 826                         tok = in.lex(sym)
 827                         if tok != '\n' && tok != _EOF && tok != _EOL {
 828                                 in.Error("syntax error (expected newline after closing paren)")
 829                         }
 830                         return x
 831                 default:
 832                         l := in.parseLine(sym)
 833                         x.Line = append(x.Line, l)
 834                         l.Comment().Before = comments
 835                         comments = nil
 836                 }
 837         }
 838 }
 839
 840 func (in *input) parseLine(sym *symType) *Line {
 841         start := sym.pos
 842         end := sym.endPos
 843         token := []string{sym.text}
 844         for {
 845                 tok := in.lex(sym)
 846                 switch tok {
 847                 case '\n', _EOF, _EOL:
 848                         return &Line{
 849                                 Start:   start,
 850                                 Token:   token,
 851                                 End:     end,
 852                                 InBlock: true,
 853                         }
 854                 default:
 855                         token = append(token, sym.text)
 856                         end = sym.endPos
 857                 }
 858         }
 859 }
 860
 861 const (
 862         _EOF = -(1 + iota)
 863         _EOL
 864         _IDENT
 865         _STRING
 866         _COMMENT
 867 )
 868
 869 var (
 870         slashSlash = []byte("//")
 871         moduleStr  = []byte("module")
 872 )
 873
 874 // ModulePath returns the module path from the gomod file text.
 875 // If it cannot find a module path, it returns an empty string.
 876 // It is tolerant of unrelated problems in the go.mod file.
 877 func ModulePath(mod []byte) string {
 878         for len(mod) > 0 {
 879                 line := mod
 880                 mod = nil
 881                 if i := bytes.IndexByte(line, '\n'); i >= 0 {
 882                         line, mod = line[:i], line[i+1:]
 883                 }
 884                 if i := bytes.Index(line, slashSlash); i >= 0 {
 885                         line = line[:i]
 886                 }
 887                 line = bytes.TrimSpace(line)
 888                 if !bytes.HasPrefix(line, moduleStr) {
 889                         continue
 890                 }
 891                 line = line[len(moduleStr):]
 892                 n := len(line)
 893                 line = bytes.TrimSpace(line)
 894                 if len(line) == n || len(line) == 0 {
 895                         continue
 896                 }
 897
 898                 if line[0] == '"' || line[0] == '`' {
 899                         p, err := strconv.Unquote(string(line))
 900                         if err != nil {
 901                                 return "" // malformed quoted string or multiline module path
 902                         }
 903                         return p
 904                 }
 905
 906                 return string(line)
 907         }
 908         return "" // missing module path
 909 }