libgo/go/go/printer/printer.go

   1 // Copyright 2009 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 // The printer package implements printing of AST nodes.
   6 package printer
   7
   8 import (
   9         "bytes"
  10         "fmt"
  11         "go/ast"
  12         "go/token"
  13         "io"
  14         "os"
  15         "path"
  16         "runtime"
  17         "tabwriter"
  18 )
  19
  20
  21 const debug = false // enable for debugging
  22
  23
  24 type whiteSpace int
  25
  26 const (
  27         ignore   = whiteSpace(0)
  28         blank    = whiteSpace(' ')
  29         vtab     = whiteSpace('\v')
  30         newline  = whiteSpace('\n')
  31         formfeed = whiteSpace('\f')
  32         indent   = whiteSpace('>')
  33         unindent = whiteSpace('<')
  34 )
  35
  36
  37 var (
  38         esc       = []byte{tabwriter.Escape}
  39         htab      = []byte{'\t'}
  40         htabs     = []byte("\t\t\t\t\t\t\t\t")
  41         newlines  = []byte("\n\n\n\n\n\n\n\n") // more than the max determined by nlines
  42         formfeeds = []byte("\f\f\f\f\f\f\f\f") // more than the max determined by nlines
  43
  44         esc_quot = []byte("&#34;") // shorter than "&quot;"
  45         esc_apos = []byte("&#39;") // shorter than "&apos;"
  46         esc_amp  = []byte("&amp;")
  47         esc_lt   = []byte("&lt;")
  48         esc_gt   = []byte("&gt;")
  49 )
  50
  51
  52 // Special positions
  53 var noPos token.Position // use noPos when a position is needed but not known
  54 var infinity = 1 << 30
  55
  56
  57 // Use ignoreMultiLine if the multiLine information is not important.
  58 var ignoreMultiLine = new(bool)
  59
  60
  61 type printer struct {
  62         // Configuration (does not change after initialization)
  63         output io.Writer
  64         Config
  65         errors chan os.Error
  66
  67         // Current state
  68         nesting int         // nesting level (0: top-level (package scope), >0: functions/decls.)
  69         written int         // number of bytes written
  70         indent  int         // current indentation
  71         escape  bool        // true if in escape sequence
  72         lastTok token.Token // the last token printed (token.ILLEGAL if it's whitespace)
  73
  74         // Buffered whitespace
  75         buffer []whiteSpace
  76
  77         // The (possibly estimated) position in the generated output;
  78         // in AST space (i.e., pos is set whenever a token position is
  79         // known accurately, and updated dependending on what has been
  80         // written).
  81         pos token.Position
  82
  83         // The value of pos immediately after the last item has been
  84         // written using writeItem.
  85         last token.Position
  86
  87         // HTML support
  88         lastTaggedLine int // last line for which a line tag was written
  89
  90         // The list of all source comments, in order of appearance.
  91         comments        []*ast.CommentGroup // may be nil
  92         cindex          int                 // current comment index
  93         useNodeComments bool                // if not set, ignore lead and line comments of nodes
  94 }
  95
  96
  97 func (p *printer) init(output io.Writer, cfg *Config) {
  98         p.output = output
  99         p.Config = *cfg
 100         p.errors = make(chan os.Error)
 101         p.buffer = make([]whiteSpace, 0, 16) // whitespace sequences are short
 102 }
 103
 104
 105 func (p *printer) internalError(msg ...interface{}) {
 106         if debug {
 107                 fmt.Print(p.pos.String() + ": ")
 108                 fmt.Println(msg...)
 109                 panic("go/printer")
 110         }
 111 }
 112
 113
 114 // nlines returns the adjusted number of linebreaks given the desired number
 115 // of breaks n such that min <= result <= max where max depends on the current
 116 // nesting level.
 117 //
 118 func (p *printer) nlines(n, min int) int {
 119         if n < min {
 120                 return min
 121         }
 122         max := 3 // max. number of newlines at the top level (p.nesting == 0)
 123         if p.nesting > 0 {
 124                 max = 2 // max. number of newlines everywhere else
 125         }
 126         if n > max {
 127                 return max
 128         }
 129         return n
 130 }
 131
 132
 133 // write0 writes raw (uninterpreted) data to p.output and handles errors.
 134 // write0 does not indent after newlines, and does not HTML-escape or update p.pos.
 135 //
 136 func (p *printer) write0(data []byte) {
 137         n, err := p.output.Write(data)
 138         p.written += n
 139         if err != nil {
 140                 p.errors <- err
 141                 runtime.Goexit()
 142         }
 143 }
 144
 145
 146 // write interprets data and writes it to p.output. It inserts indentation
 147 // after a line break unless in a tabwriter escape sequence, and it HTML-
 148 // escapes characters if GenHTML is set. It updates p.pos as a side-effect.
 149 //
 150 func (p *printer) write(data []byte) {
 151         i0 := 0
 152         for i, b := range data {
 153                 switch b {
 154                 case '\n', '\f':
 155                         // write segment ending in b
 156                         p.write0(data[i0 : i+1])
 157
 158                         // update p.pos
 159                         p.pos.Offset += i + 1 - i0
 160                         p.pos.Line++
 161                         p.pos.Column = 1
 162
 163                         if !p.escape {
 164                                 // write indentation
 165                                 // use "hard" htabs - indentation columns
 166                                 // must not be discarded by the tabwriter
 167                                 j := p.indent
 168                                 for ; j > len(htabs); j -= len(htabs) {
 169                                         p.write0(htabs)
 170                                 }
 171                                 p.write0(htabs[0:j])
 172
 173                                 // update p.pos
 174                                 p.pos.Offset += p.indent
 175                                 p.pos.Column += p.indent
 176                         }
 177
 178                         // next segment start
 179                         i0 = i + 1
 180
 181                 case '"', '\'', '&', '<', '>':
 182                         if p.Mode&GenHTML != 0 {
 183                                 // write segment ending in b
 184                                 p.write0(data[i0:i])
 185
 186                                 // write HTML-escaped b
 187                                 var esc []byte
 188                                 switch b {
 189                                 case '"':
 190                                         esc = esc_quot
 191                                 case '\'':
 192                                         esc = esc_apos
 193                                 case '&':
 194                                         esc = esc_amp
 195                                 case '<':
 196                                         esc = esc_lt
 197                                 case '>':
 198                                         esc = esc_gt
 199                                 }
 200                                 p.write0(esc)
 201
 202                                 // update p.pos
 203                                 d := i + 1 - i0
 204                                 p.pos.Offset += d
 205                                 p.pos.Column += d
 206
 207                                 // next segment start
 208                                 i0 = i + 1
 209                         }
 210
 211                 case tabwriter.Escape:
 212                         p.escape = !p.escape
 213
 214                         // ignore escape chars introduced by printer - they are
 215                         // invisible and must not affect p.pos (was issue #1089)
 216                         p.pos.Offset--
 217                         p.pos.Column--
 218                 }
 219         }
 220
 221         // write remaining segment
 222         p.write0(data[i0:])
 223
 224         // update p.pos
 225         d := len(data) - i0
 226         p.pos.Offset += d
 227         p.pos.Column += d
 228 }
 229
 230
 231 func (p *printer) writeNewlines(n int, useFF bool) {
 232         if n > 0 {
 233                 n = p.nlines(n, 0)
 234                 if useFF {
 235                         p.write(formfeeds[0:n])
 236                 } else {
 237                         p.write(newlines[0:n])
 238                 }
 239         }
 240 }
 241
 242
 243 func (p *printer) writeTaggedItem(data []byte, tag HTMLTag) {
 244         // write start tag, if any
 245         // (no html-escaping and no p.pos update for tags - use write0)
 246         if tag.Start != "" {
 247                 p.write0([]byte(tag.Start))
 248         }
 249         p.write(data)
 250         // write end tag, if any
 251         if tag.End != "" {
 252                 p.write0([]byte(tag.End))
 253         }
 254 }
 255
 256
 257 // writeItem writes data at position pos. data is the text corresponding to
 258 // a single lexical token, but may also be comment text. pos is the actual
 259 // (or at least very accurately estimated) position of the data in the original
 260 // source text. If tags are present and GenHTML is set, the tags are written
 261 // before and after the data. writeItem updates p.last to the position
 262 // immediately following the data.
 263 //
 264 func (p *printer) writeItem(pos token.Position, data []byte, tag HTMLTag) {
 265         fileChanged := false
 266         if pos.IsValid() {
 267                 // continue with previous position if we don't have a valid pos
 268                 if p.last.IsValid() && p.last.Filename != pos.Filename {
 269                         // the file has changed - reset state
 270                         // (used when printing merged ASTs of different files
 271                         // e.g., the result of ast.MergePackageFiles)
 272                         p.indent = 0
 273                         p.escape = false
 274                         p.buffer = p.buffer[0:0]
 275                         fileChanged = true
 276                 }
 277                 p.pos = pos
 278         }
 279         if debug {
 280                 // do not update p.pos - use write0
 281                 _, filename := path.Split(pos.Filename)
 282                 p.write0([]byte(fmt.Sprintf("[%s:%d:%d]", filename, pos.Line, pos.Column)))
 283         }
 284         if p.Mode&GenHTML != 0 {
 285                 // write line tag if on a new line
 286                 // TODO(gri): should write line tags on each line at the start
 287                 //            will be more useful (e.g. to show line numbers)
 288                 if p.Styler != nil && (pos.Line != p.lastTaggedLine || fileChanged) {
 289                         p.writeTaggedItem(p.Styler.LineTag(pos.Line))
 290                         p.lastTaggedLine = pos.Line
 291                 }
 292                 p.writeTaggedItem(data, tag)
 293         } else {
 294                 p.write(data)
 295         }
 296         p.last = p.pos
 297 }
 298
 299
 300 // writeCommentPrefix writes the whitespace before a comment.
 301 // If there is any pending whitespace, it consumes as much of
 302 // it as is likely to help position the comment nicely.
 303 // pos is the comment position, next the position of the item
 304 // after all pending comments, isFirst indicates if this is the
 305 // first comment in a group of comments, and isKeyword indicates
 306 // if the next item is a keyword.
 307 //
 308 func (p *printer) writeCommentPrefix(pos, next token.Position, isFirst, isKeyword bool) {
 309         if !p.last.IsValid() {
 310                 // there was no preceeding item and the comment is the
 311                 // first item to be printed - don't write any whitespace
 312                 return
 313         }
 314
 315         if pos.IsValid() && pos.Filename != p.last.Filename {
 316                 // comment in a different file - separate with newlines (writeNewlines will limit the number)
 317                 p.writeNewlines(10, true)
 318                 return
 319         }
 320
 321         if pos.IsValid() && pos.Line == p.last.Line {
 322                 // comment on the same line as last item:
 323                 // separate with at least one separator
 324                 hasSep := false
 325                 if isFirst {
 326                         j := 0
 327                         for i, ch := range p.buffer {
 328                                 switch ch {
 329                                 case blank:
 330                                         // ignore any blanks before a comment
 331                                         p.buffer[i] = ignore
 332                                         continue
 333                                 case vtab:
 334                                         // respect existing tabs - important
 335                                         // for proper formatting of commented structs
 336                                         hasSep = true
 337                                         continue
 338                                 case indent:
 339                                         // apply pending indentation
 340                                         continue
 341                                 }
 342                                 j = i
 343                                 break
 344                         }
 345                         p.writeWhitespace(j)
 346                 }
 347                 // make sure there is at least one separator
 348                 if !hasSep {
 349                         if pos.Line == next.Line {
 350                                 // next item is on the same line as the comment
 351                                 // (which must be a /*-style comment): separate
 352                                 // with a blank instead of a tab
 353                                 p.write([]byte{' '})
 354                         } else {
 355                                 p.write(htab)
 356                         }
 357                 }
 358
 359         } else {
 360                 // comment on a different line:
 361                 // separate with at least one line break
 362                 if isFirst {
 363                         j := 0
 364                         for i, ch := range p.buffer {
 365                                 switch ch {
 366                                 case blank, vtab:
 367                                         // ignore any horizontal whitespace before line breaks
 368                                         p.buffer[i] = ignore
 369                                         continue
 370                                 case indent:
 371                                         // apply pending indentation
 372                                         continue
 373                                 case unindent:
 374                                         // if the next token is a keyword, apply the outdent
 375                                         // if it appears that the comment is aligned with the
 376                                         // keyword; otherwise assume the outdent is part of a
 377                                         // closing block and stop (this scenario appears with
 378                                         // comments before a case label where the comments
 379                                         // apply to the next case instead of the current one)
 380                                         if isKeyword && pos.Column == next.Column {
 381                                                 continue
 382                                         }
 383                                 case newline, formfeed:
 384                                         // TODO(gri): may want to keep formfeed info in some cases
 385                                         p.buffer[i] = ignore
 386                                 }
 387                                 j = i
 388                                 break
 389                         }
 390                         p.writeWhitespace(j)
 391                 }
 392                 // use formfeeds to break columns before a comment;
 393                 // this is analogous to using formfeeds to separate
 394                 // individual lines of /*-style comments
 395                 // (if !pos.IsValid(), pos.Line == 0, and this will
 396                 // print no newlines)
 397                 p.writeNewlines(pos.Line-p.last.Line, true)
 398         }
 399 }
 400
 401
 402 func (p *printer) writeCommentLine(comment *ast.Comment, pos token.Position, line []byte) {
 403         // line must pass through unchanged, bracket it with tabwriter.Escape
 404         line = bytes.Join([][]byte{esc, line, esc}, nil)
 405
 406         // apply styler, if any
 407         var tag HTMLTag
 408         if p.Styler != nil {
 409                 line, tag = p.Styler.Comment(comment, line)
 410         }
 411
 412         p.writeItem(pos, line, tag)
 413 }
 414
 415
 416 // TODO(gri): Similar (but not quite identical) functionality for
 417 //            comment processing can be found in go/doc/comment.go.
 418 //            Perhaps this can be factored eventually.
 419
 420 // Split comment text into lines
 421 func split(text []byte) [][]byte {
 422         // count lines (comment text never ends in a newline)
 423         n := 1
 424         for _, c := range text {
 425                 if c == '\n' {
 426                         n++
 427                 }
 428         }
 429
 430         // split
 431         lines := make([][]byte, n)
 432         n = 0
 433         i := 0
 434         for j, c := range text {
 435                 if c == '\n' {
 436                         lines[n] = text[i:j] // exclude newline
 437                         i = j + 1            // discard newline
 438                         n++
 439                 }
 440         }
 441         lines[n] = text[i:]
 442
 443         return lines
 444 }
 445
 446
 447 func isBlank(s []byte) bool {
 448         for _, b := range s {
 449                 if b > ' ' {
 450                         return false
 451                 }
 452         }
 453         return true
 454 }
 455
 456
 457 func commonPrefix(a, b []byte) []byte {
 458         i := 0
 459         for i < len(a) && i < len(b) && a[i] == b[i] && (a[i] <= ' ' || a[i] == '*') {
 460                 i++
 461         }
 462         return a[0:i]
 463 }
 464
 465
 466 func stripCommonPrefix(lines [][]byte) {
 467         if len(lines) < 2 {
 468                 return // at most one line - nothing to do
 469         }
 470         // len(lines) >= 2
 471
 472         // The heuristic in this function tries to handle a few
 473         // common patterns of /*-style comments: Comments where
 474         // the opening /* and closing */ are aligned and the
 475         // rest of the comment text is aligned and indented with
 476         // blanks or tabs, cases with a vertical "line of stars"
 477         // on the left, and cases where the closing */ is on the
 478         // same line as the last comment text.
 479
 480         // Compute maximum common white prefix of all but the first,
 481         // last, and blank lines, and replace blank lines with empty
 482         // lines (the first line starts with /* and has no prefix).
 483         // In case of two-line comments, consider the last line for
 484         // the prefix computation since otherwise the prefix would
 485         // be empty.
 486         //
 487         // Note that the first and last line are never empty (they
 488         // contain the opening /* and closing */ respectively) and
 489         // thus they can be ignored by the blank line check.
 490         var prefix []byte
 491         if len(lines) > 2 {
 492                 for i, line := range lines[1 : len(lines)-1] {
 493                         switch {
 494                         case isBlank(line):
 495                                 lines[1+i] = nil // range starts at line 1
 496                         case prefix == nil:
 497                                 prefix = commonPrefix(line, line)
 498                         default:
 499                                 prefix = commonPrefix(prefix, line)
 500                         }
 501                 }
 502         } else { // len(lines) == 2
 503                 line := lines[1]
 504                 prefix = commonPrefix(line, line)
 505         }
 506
 507         /*
 508          * Check for vertical "line of stars" and correct prefix accordingly.
 509          */
 510         lineOfStars := false
 511         if i := bytes.Index(prefix, []byte{'*'}); i >= 0 {
 512                 // Line of stars present.
 513                 if i > 0 && prefix[i-1] == ' ' {
 514                         i-- // remove trailing blank from prefix so stars remain aligned
 515                 }
 516                 prefix = prefix[0:i]
 517                 lineOfStars = true
 518         } else {
 519                 // No line of stars present.
 520                 // Determine the white space on the first line after the /*
 521                 // and before the beginning of the comment text, assume two
 522                 // blanks instead of the /* unless the first character after
 523                 // the /* is a tab. If the first comment line is empty but
 524                 // for the opening /*, assume up to 3 blanks or a tab. This
 525                 // whitespace may be found as suffix in the common prefix.
 526                 first := lines[0]
 527                 if isBlank(first[2:]) {
 528                         // no comment text on the first line:
 529                         // reduce prefix by up to 3 blanks or a tab
 530                         // if present - this keeps comment text indented
 531                         // relative to the /* and */'s if it was indented
 532                         // in the first place
 533                         i := len(prefix)
 534                         for n := 0; n < 3 && i > 0 && prefix[i-1] == ' '; n++ {
 535                                 i--
 536                         }
 537                         if i == len(prefix) && i > 0 && prefix[i-1] == '\t' {
 538                                 i--
 539                         }
 540                         prefix = prefix[0:i]
 541                 } else {
 542                         // comment text on the first line
 543                         suffix := make([]byte, len(first))
 544                         n := 2 // start after opening /*
 545                         for n < len(first) && first[n] <= ' ' {
 546                                 suffix[n] = first[n]
 547                                 n++
 548                         }
 549                         if n > 2 && suffix[2] == '\t' {
 550                                 // assume the '\t' compensates for the /*
 551                                 suffix = suffix[2:n]
 552                         } else {
 553                                 // otherwise assume two blanks
 554                                 suffix[0], suffix[1] = ' ', ' '
 555                                 suffix = suffix[0:n]
 556                         }
 557                         // Shorten the computed common prefix by the length of
 558                         // suffix, if it is found as suffix of the prefix.
 559                         if bytes.HasSuffix(prefix, suffix) {
 560                                 prefix = prefix[0 : len(prefix)-len(suffix)]
 561                         }
 562                 }
 563         }
 564
 565         // Handle last line: If it only contains a closing */, align it
 566         // with the opening /*, otherwise align the text with the other
 567         // lines.
 568         last := lines[len(lines)-1]
 569         closing := []byte("*/")
 570         i := bytes.Index(last, closing)
 571         if isBlank(last[0:i]) {
 572                 // last line only contains closing */
 573                 var sep []byte
 574                 if lineOfStars {
 575                         // insert an aligning blank
 576                         sep = []byte{' '}
 577                 }
 578                 lines[len(lines)-1] = bytes.Join([][]byte{prefix, closing}, sep)
 579         } else {
 580                 // last line contains more comment text - assume
 581                 // it is aligned like the other lines
 582                 prefix = commonPrefix(prefix, last)
 583         }
 584
 585         // Remove the common prefix from all but the first and empty lines.
 586         for i, line := range lines[1:] {
 587                 if len(line) != 0 {
 588                         lines[1+i] = line[len(prefix):] // range starts at line 1
 589                 }
 590         }
 591 }
 592
 593
 594 func (p *printer) writeComment(comment *ast.Comment) {
 595         text := comment.Text
 596
 597         // shortcut common case of //-style comments
 598         if text[1] == '/' {
 599                 p.writeCommentLine(comment, comment.Pos(), text)
 600                 return
 601         }
 602
 603         // for /*-style comments, print line by line and let the
 604         // write function take care of the proper indentation
 605         lines := split(text)
 606         stripCommonPrefix(lines)
 607
 608         // write comment lines, separated by formfeed,
 609         // without a line break after the last line
 610         linebreak := formfeeds[0:1]
 611         pos := comment.Pos()
 612         for i, line := range lines {
 613                 if i > 0 {
 614                         p.write(linebreak)
 615                         pos = p.pos
 616                 }
 617                 if len(line) > 0 {
 618                         p.writeCommentLine(comment, pos, line)
 619                 }
 620         }
 621 }
 622
 623
 624 // writeCommentSuffix writes a line break after a comment if indicated
 625 // and processes any leftover indentation information. If a line break
 626 // is needed, the kind of break (newline vs formfeed) depends on the
 627 // pending whitespace. writeCommentSuffix returns true if a pending
 628 // formfeed was dropped from the whitespace buffer.
 629 //
 630 func (p *printer) writeCommentSuffix(needsLinebreak bool) (droppedFF bool) {
 631         for i, ch := range p.buffer {
 632                 switch ch {
 633                 case blank, vtab:
 634                         // ignore trailing whitespace
 635                         p.buffer[i] = ignore
 636                 case indent, unindent:
 637                         // don't loose indentation information
 638                 case newline, formfeed:
 639                         // if we need a line break, keep exactly one
 640                         // but remember if we dropped any formfeeds
 641                         if needsLinebreak {
 642                                 needsLinebreak = false
 643                         } else {
 644                                 if ch == formfeed {
 645                                         droppedFF = true
 646                                 }
 647                                 p.buffer[i] = ignore
 648                         }
 649                 }
 650         }
 651         p.writeWhitespace(len(p.buffer))
 652
 653         // make sure we have a line break
 654         if needsLinebreak {
 655                 p.write([]byte{'\n'})
 656         }
 657
 658         return
 659 }
 660
 661
 662 // intersperseComments consumes all comments that appear before the next token
 663 // tok and prints it together with the buffered whitespace (i.e., the whitespace
 664 // that needs to be written before the next token). A heuristic is used to mix
 665 // the comments and whitespace. intersperseComments returns true if a pending
 666 // formfeed was dropped from the whitespace buffer.
 667 //
 668 func (p *printer) intersperseComments(next token.Position, tok token.Token) (droppedFF bool) {
 669         var last *ast.Comment
 670         for ; p.commentBefore(next); p.cindex++ {
 671                 for _, c := range p.comments[p.cindex].List {
 672                         p.writeCommentPrefix(c.Pos(), next, last == nil, tok.IsKeyword())
 673                         p.writeComment(c)
 674                         last = c
 675                 }
 676         }
 677
 678         if last != nil {
 679                 if last.Text[1] == '*' && last.Pos().Line == next.Line {
 680                         // the last comment is a /*-style comment and the next item
 681                         // follows on the same line: separate with an extra blank
 682                         p.write([]byte{' '})
 683                 }
 684                 // ensure that there is a newline after a //-style comment
 685                 // or if we are before a closing '}' or at the end of a file
 686                 return p.writeCommentSuffix(last.Text[1] == '/' || tok == token.RBRACE || tok == token.EOF)
 687         }
 688
 689         // no comment was written - we should never reach here since
 690         // intersperseComments should not be called in that case
 691         p.internalError("intersperseComments called without pending comments")
 692         return false
 693 }
 694
 695
 696 // whiteWhitespace writes the first n whitespace entries.
 697 func (p *printer) writeWhitespace(n int) {
 698         // write entries
 699         var data [1]byte
 700         for i := 0; i < n; i++ {
 701                 switch ch := p.buffer[i]; ch {
 702                 case ignore:
 703                         // ignore!
 704                 case indent:
 705                         p.indent++
 706                 case unindent:
 707                         p.indent--
 708                         if p.indent < 0 {
 709                                 p.internalError("negative indentation:", p.indent)
 710                                 p.indent = 0
 711                         }
 712                 case newline, formfeed:
 713                         // A line break immediately followed by a "correcting"
 714                         // unindent is swapped with the unindent - this permits
 715                         // proper label positioning. If a comment is between
 716                         // the line break and the label, the unindent is not
 717                         // part of the comment whitespace prefix and the comment
 718                         // will be positioned correctly indented.
 719                         if i+1 < n && p.buffer[i+1] == unindent {
 720                                 // Use a formfeed to terminate the current section.
 721                                 // Otherwise, a long label name on the next line leading
 722                                 // to a wide column may increase the indentation column
 723                                 // of lines before the label; effectively leading to wrong
 724                                 // indentation.
 725                                 p.buffer[i], p.buffer[i+1] = unindent, formfeed
 726                                 i-- // do it again
 727                                 continue
 728                         }
 729                         fallthrough
 730                 default:
 731                         data[0] = byte(ch)
 732                         p.write(data[0:])
 733                 }
 734         }
 735
 736         // shift remaining entries down
 737         i := 0
 738         for ; n < len(p.buffer); n++ {
 739                 p.buffer[i] = p.buffer[n]
 740                 i++
 741         }
 742         p.buffer = p.buffer[0:i]
 743 }
 744
 745
 746 // ----------------------------------------------------------------------------
 747 // Printing interface
 748
 749
 750 func mayCombine(prev token.Token, next byte) (b bool) {
 751         switch prev {
 752         case token.INT:
 753                 b = next == '.' // 1.
 754         case token.ADD:
 755                 b = next == '+' // ++
 756         case token.SUB:
 757                 b = next == '-' // --
 758         case token.QUO:
 759                 b = next == '*' // /*
 760         case token.LSS:
 761                 b = next == '-' || next == '<' // <- or <<
 762         case token.AND:
 763                 b = next == '&' || next == '^' // && or &^
 764         }
 765         return
 766 }
 767
 768
 769 // print prints a list of "items" (roughly corresponding to syntactic
 770 // tokens, but also including whitespace and formatting information).
 771 // It is the only print function that should be called directly from
 772 // any of the AST printing functions in nodes.go.
 773 //
 774 // Whitespace is accumulated until a non-whitespace token appears. Any
 775 // comments that need to appear before that token are printed first,
 776 // taking into account the amount and structure of any pending white-
 777 // space for best comment placement. Then, any leftover whitespace is
 778 // printed, followed by the actual token.
 779 //
 780 func (p *printer) print(args ...interface{}) {
 781         for _, f := range args {
 782                 next := p.pos // estimated position of next item
 783                 var data []byte
 784                 var tag HTMLTag
 785                 var tok token.Token
 786
 787                 switch x := f.(type) {
 788                 case whiteSpace:
 789                         if x == ignore {
 790                                 // don't add ignore's to the buffer; they
 791                                 // may screw up "correcting" unindents (see
 792                                 // LabeledStmt)
 793                                 break
 794                         }
 795                         i := len(p.buffer)
 796                         if i == cap(p.buffer) {
 797                                 // Whitespace sequences are very short so this should
 798                                 // never happen. Handle gracefully (but possibly with
 799                                 // bad comment placement) if it does happen.
 800                                 p.writeWhitespace(i)
 801                                 i = 0
 802                         }
 803                         p.buffer = p.buffer[0 : i+1]
 804                         p.buffer[i] = x
 805                 case *ast.Ident:
 806                         if p.Styler != nil {
 807                                 data, tag = p.Styler.Ident(x)
 808                         } else {
 809                                 data = []byte(x.Name)
 810                         }
 811                         tok = token.IDENT
 812                 case *ast.BasicLit:
 813                         if p.Styler != nil {
 814                                 data, tag = p.Styler.BasicLit(x)
 815                         } else {
 816                                 data = x.Value
 817                         }
 818                         // escape all literals so they pass through unchanged
 819                         // (note that valid Go programs cannot contain esc ('\xff')
 820                         // bytes since they do not appear in legal UTF-8 sequences)
 821                         // TODO(gri): do this more efficiently.
 822                         data = []byte("\xff" + string(data) + "\xff")
 823                         tok = x.Kind
 824                 case token.Token:
 825                         s := x.String()
 826                         if mayCombine(p.lastTok, s[0]) {
 827                                 // the previous and the current token must be
 828                                 // separated by a blank otherwise they combine
 829                                 // into a different incorrect token sequence
 830                                 // (except for token.INT followed by a '.' this
 831                                 // should never happen because it is taken care
 832                                 // of via binary expression formatting)
 833                                 if len(p.buffer) != 0 {
 834                                         p.internalError("whitespace buffer not empty")
 835                                 }
 836                                 p.buffer = p.buffer[0:1]
 837                                 p.buffer[0] = ' '
 838                         }
 839                         if p.Styler != nil {
 840                                 data, tag = p.Styler.Token(x)
 841                         } else {
 842                                 data = []byte(s)
 843                         }
 844                         tok = x
 845                 case token.Position:
 846                         if x.IsValid() {
 847                                 next = x // accurate position of next item
 848                         }
 849                         tok = p.lastTok
 850                 default:
 851                         fmt.Fprintf(os.Stderr, "print: unsupported argument type %T\n", f)
 852                         panic("go/printer type")
 853                 }
 854                 p.lastTok = tok
 855                 p.pos = next
 856
 857                 if data != nil {
 858                         droppedFF := p.flush(next, tok)
 859
 860                         // intersperse extra newlines if present in the source
 861                         // (don't do this in flush as it will cause extra newlines
 862                         // at the end of a file) - use formfeeds if we dropped one
 863                         // before
 864                         p.writeNewlines(next.Line-p.pos.Line, droppedFF)
 865
 866                         p.writeItem(next, data, tag)
 867                 }
 868         }
 869 }
 870
 871
 872 // commentBefore returns true iff the current comment occurs
 873 // before the next position in the source code.
 874 //
 875 func (p *printer) commentBefore(next token.Position) bool {
 876         return p.cindex < len(p.comments) && p.comments[p.cindex].List[0].Pos().Offset < next.Offset
 877 }
 878
 879
 880 // Flush prints any pending comments and whitespace occuring
 881 // textually before the position of the next token tok. Flush
 882 // returns true if a pending formfeed character was dropped
 883 // from the whitespace buffer as a result of interspersing
 884 // comments.
 885 //
 886 func (p *printer) flush(next token.Position, tok token.Token) (droppedFF bool) {
 887         if p.commentBefore(next) {
 888                 // if there are comments before the next item, intersperse them
 889                 droppedFF = p.intersperseComments(next, tok)
 890         } else {
 891                 // otherwise, write any leftover whitespace
 892                 p.writeWhitespace(len(p.buffer))
 893         }
 894         return
 895 }
 896
 897
 898 // ----------------------------------------------------------------------------
 899 // Trimmer
 900
 901 // A trimmer is an io.Writer filter for stripping tabwriter.Escape
 902 // characters, trailing blanks and tabs, and for converting formfeed
 903 // and vtab characters into newlines and htabs (in case no tabwriter
 904 // is used). Text bracketed by tabwriter.Escape characters is passed
 905 // through unchanged.
 906 //
 907 type trimmer struct {
 908         output io.Writer
 909         space  bytes.Buffer
 910         state  int
 911 }
 912
 913
 914 // trimmer is implemented as a state machine.
 915 // It can be in one of the following states:
 916 const (
 917         inSpace = iota
 918         inEscape
 919         inText
 920 )
 921
 922
 923 // Design note: It is tempting to eliminate extra blanks occuring in
 924 //              whitespace in this function as it could simplify some
 925 //              of the blanks logic in the node printing functions.
 926 //              However, this would mess up any formatting done by
 927 //              the tabwriter.
 928
 929 func (p *trimmer) Write(data []byte) (n int, err os.Error) {
 930         m := 0 // if p.state != inSpace, data[m:n] is unwritten
 931         var b byte
 932         for n, b = range data {
 933                 if b == '\v' {
 934                         b = '\t' // convert to htab
 935                 }
 936                 switch p.state {
 937                 case inSpace:
 938                         switch b {
 939                         case '\t', ' ':
 940                                 p.space.WriteByte(b) // WriteByte returns no errors
 941                         case '\f', '\n':
 942                                 p.space.Reset()                        // discard trailing space
 943                                 _, err = p.output.Write(newlines[0:1]) // write newline
 944                         case tabwriter.Escape:
 945                                 _, err = p.output.Write(p.space.Bytes())
 946                                 p.space.Reset()
 947                                 p.state = inEscape
 948                                 m = n + 1 // drop tabwriter.Escape
 949                         default:
 950                                 _, err = p.output.Write(p.space.Bytes())
 951                                 p.space.Reset()
 952                                 p.state = inText
 953                                 m = n
 954                         }
 955                 case inEscape:
 956                         if b == tabwriter.Escape {
 957                                 _, err = p.output.Write(data[m:n])
 958                                 p.state = inSpace
 959                         }
 960                 case inText:
 961                         switch b {
 962                         case '\t', ' ':
 963                                 _, err = p.output.Write(data[m:n])
 964                                 p.state = inSpace
 965                                 p.space.WriteByte(b) // WriteByte returns no errors
 966                         case '\f':
 967                                 data[n] = '\n' // convert to newline
 968                         case tabwriter.Escape:
 969                                 _, err = p.output.Write(data[m:n])
 970                                 p.state = inEscape
 971                                 m = n + 1 // drop tabwriter.Escape
 972                         }
 973                 }
 974                 if err != nil {
 975                         return
 976                 }
 977         }
 978         n = len(data)
 979
 980         if p.state != inSpace {
 981                 _, err = p.output.Write(data[m:n])
 982                 p.state = inSpace
 983         }
 984
 985         return
 986 }
 987
 988
 989 // ----------------------------------------------------------------------------
 990 // Public interface
 991
 992 // General printing is controlled with these Config.Mode flags.
 993 const (
 994         GenHTML   uint = 1 << iota // generate HTML
 995         RawFormat                  // do not use a tabwriter; if set, UseSpaces is ignored
 996         TabIndent                  // use tabs for indentation independent of UseSpaces
 997         UseSpaces                  // use spaces instead of tabs for alignment
 998 )
 999
1000
1001 // An HTMLTag specifies a start and end tag.
1002 type HTMLTag struct {
1003         Start, End string // empty if tags are absent
1004 }
1005
1006
1007 // A Styler specifies formatting of line tags and elementary Go words.
1008 // A format consists of text and a (possibly empty) surrounding HTML tag.
1009 //
1010 type Styler interface {
1011         LineTag(line int) ([]byte, HTMLTag)
1012         Comment(c *ast.Comment, line []byte) ([]byte, HTMLTag)
1013         BasicLit(x *ast.BasicLit) ([]byte, HTMLTag)
1014         Ident(id *ast.Ident) ([]byte, HTMLTag)
1015         Token(tok token.Token) ([]byte, HTMLTag)
1016 }
1017
1018
1019 // A Config node controls the output of Fprint.
1020 type Config struct {
1021         Mode     uint   // default: 0
1022         Tabwidth int    // default: 8
1023         Styler   Styler // default: nil
1024 }
1025
1026
1027 // Fprint "pretty-prints" an AST node to output and returns the number
1028 // of bytes written and an error (if any) for a given configuration cfg.
1029 // The node type must be *ast.File, or assignment-compatible to ast.Expr,
1030 // ast.Decl, ast.Spec, or ast.Stmt.
1031 //
1032 func (cfg *Config) Fprint(output io.Writer, node interface{}) (int, os.Error) {
1033         // redirect output through a trimmer to eliminate trailing whitespace
1034         // (Input to a tabwriter must be untrimmed since trailing tabs provide
1035         // formatting information. The tabwriter could provide trimming
1036         // functionality but no tabwriter is used when RawFormat is set.)
1037         output = &trimmer{output: output}
1038
1039         // setup tabwriter if needed and redirect output
1040         var tw *tabwriter.Writer
1041         if cfg.Mode&RawFormat == 0 {
1042                 minwidth := cfg.Tabwidth
1043
1044                 padchar := byte('\t')
1045                 if cfg.Mode&UseSpaces != 0 {
1046                         padchar = ' '
1047                 }
1048
1049                 twmode := tabwriter.DiscardEmptyColumns
1050                 if cfg.Mode&GenHTML != 0 {
1051                         twmode |= tabwriter.FilterHTML
1052                 }
1053                 if cfg.Mode&TabIndent != 0 {
1054                         minwidth = 0
1055                         twmode |= tabwriter.TabIndent
1056                 }
1057
1058                 tw = tabwriter.NewWriter(output, minwidth, cfg.Tabwidth, 1, padchar, twmode)
1059                 output = tw
1060         }
1061
1062         // setup printer and print node
1063         var p printer
1064         p.init(output, cfg)
1065         go func() {
1066                 switch n := node.(type) {
1067                 case ast.Expr:
1068                         p.nesting = 1
1069                         p.useNodeComments = true
1070                         p.expr(n, ignoreMultiLine)
1071                 case ast.Stmt:
1072                         p.nesting = 1
1073                         p.useNodeComments = true
1074                         // A labeled statement will un-indent to position the
1075                         // label. Set indent to 1 so we don't get indent "underflow".
1076                         if _, labeledStmt := n.(*ast.LabeledStmt); labeledStmt {
1077                                 p.indent = 1
1078                         }
1079                         p.stmt(n, false, ignoreMultiLine)
1080                 case ast.Decl:
1081                         p.nesting = 1
1082                         p.useNodeComments = true
1083                         p.decl(n, ignoreMultiLine)
1084                 case ast.Spec:
1085                         p.nesting = 1
1086                         p.useNodeComments = true
1087                         p.spec(n, 1, false, ignoreMultiLine)
1088                 case *ast.File:
1089                         p.nesting = 0
1090                         p.comments = n.Comments
1091                         p.useNodeComments = n.Comments == nil
1092                         p.file(n)
1093                 default:
1094                         p.errors <- fmt.Errorf("printer.Fprint: unsupported node type %T", n)
1095                         runtime.Goexit()
1096                 }
1097                 p.flush(token.Position{Offset: infinity, Line: infinity}, token.EOF)
1098                 p.errors <- nil // no errors
1099         }()
1100         err := <-p.errors // wait for completion of goroutine
1101
1102         // flush tabwriter, if any
1103         if tw != nil {
1104                 tw.Flush() // ignore errors
1105         }
1106
1107         return p.written, err
1108 }
1109
1110
1111 // Fprint "pretty-prints" an AST node to output.
1112 // It calls Config.Fprint with default settings.
1113 //
1114 func Fprint(output io.Writer, node interface{}) os.Error {
1115         _, err := (&Config{Tabwidth: 8}).Fprint(output, node) // don't care about number of bytes written
1116         return err
1117 }