1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // The printer package implements printing of AST nodes.
21 const debug = false // enable for debugging
27 ignore = whiteSpace(0)
28 blank = whiteSpace(' ')
29 vtab = whiteSpace('\v')
30 newline = whiteSpace('\n')
31 formfeed = whiteSpace('\f')
32 indent = whiteSpace('>')
33 unindent = whiteSpace('<')
38 esc = []byte{tabwriter.Escape}
40 htabs = []byte("\t\t\t\t\t\t\t\t")
41 newlines = []byte("\n\n\n\n\n\n\n\n") // more than the max determined by nlines
42 formfeeds = []byte("\f\f\f\f\f\f\f\f") // more than the max determined by nlines
44 esc_quot = []byte(""") // shorter than """
45 esc_apos = []byte("'") // shorter than "'"
46 esc_amp = []byte("&")
47 esc_lt = []byte("<")
48 esc_gt = []byte(">")
53 var noPos token.Position // use noPos when a position is needed but not known
54 var infinity = 1 << 30
57 // Use ignoreMultiLine if the multiLine information is not important.
58 var ignoreMultiLine = new(bool)
62 // Configuration (does not change after initialization)
68 nesting int // nesting level (0: top-level (package scope), >0: functions/decls.)
69 written int // number of bytes written
70 indent int // current indentation
71 escape bool // true if in escape sequence
72 lastTok token.Token // the last token printed (token.ILLEGAL if it's whitespace)
74 // Buffered whitespace
77 // The (possibly estimated) position in the generated output;
78 // in AST space (i.e., pos is set whenever a token position is
79 // known accurately, and updated dependending on what has been
83 // The value of pos immediately after the last item has been
84 // written using writeItem.
88 lastTaggedLine int // last line for which a line tag was written
90 // The list of all source comments, in order of appearance.
91 comments []*ast.CommentGroup // may be nil
92 cindex int // current comment index
93 useNodeComments bool // if not set, ignore lead and line comments of nodes
97 func (p *printer) init(output io.Writer, cfg *Config) {
100 p.errors = make(chan os.Error)
101 p.buffer = make([]whiteSpace, 0, 16) // whitespace sequences are short
105 func (p *printer) internalError(msg ...interface{}) {
107 fmt.Print(p.pos.String() + ": ")
114 // nlines returns the adjusted number of linebreaks given the desired number
115 // of breaks n such that min <= result <= max where max depends on the current
118 func (p *printer) nlines(n, min int) int {
122 max := 3 // max. number of newlines at the top level (p.nesting == 0)
124 max = 2 // max. number of newlines everywhere else
133 // write0 writes raw (uninterpreted) data to p.output and handles errors.
134 // write0 does not indent after newlines, and does not HTML-escape or update p.pos.
136 func (p *printer) write0(data []byte) {
137 n, err := p.output.Write(data)
146 // write interprets data and writes it to p.output. It inserts indentation
147 // after a line break unless in a tabwriter escape sequence, and it HTML-
148 // escapes characters if GenHTML is set. It updates p.pos as a side-effect.
150 func (p *printer) write(data []byte) {
152 for i, b := range data {
155 // write segment ending in b
156 p.write0(data[i0 : i+1])
159 p.pos.Offset += i + 1 - i0
165 // use "hard" htabs - indentation columns
166 // must not be discarded by the tabwriter
168 for ; j > len(htabs); j -= len(htabs) {
174 p.pos.Offset += p.indent
175 p.pos.Column += p.indent
178 // next segment start
181 case '"', '\'', '&', '<', '>':
182 if p.Mode&GenHTML != 0 {
183 // write segment ending in b
186 // write HTML-escaped b
207 // next segment start
211 case tabwriter.Escape:
214 // ignore escape chars introduced by printer - they are
215 // invisible and must not affect p.pos (was issue #1089)
221 // write remaining segment
231 func (p *printer) writeNewlines(n int, useFF bool) {
235 p.write(formfeeds[0:n])
237 p.write(newlines[0:n])
243 func (p *printer) writeTaggedItem(data []byte, tag HTMLTag) {
244 // write start tag, if any
245 // (no html-escaping and no p.pos update for tags - use write0)
247 p.write0([]byte(tag.Start))
250 // write end tag, if any
252 p.write0([]byte(tag.End))
257 // writeItem writes data at position pos. data is the text corresponding to
258 // a single lexical token, but may also be comment text. pos is the actual
259 // (or at least very accurately estimated) position of the data in the original
260 // source text. If tags are present and GenHTML is set, the tags are written
261 // before and after the data. writeItem updates p.last to the position
262 // immediately following the data.
264 func (p *printer) writeItem(pos token.Position, data []byte, tag HTMLTag) {
267 // continue with previous position if we don't have a valid pos
268 if p.last.IsValid() && p.last.Filename != pos.Filename {
269 // the file has changed - reset state
270 // (used when printing merged ASTs of different files
271 // e.g., the result of ast.MergePackageFiles)
274 p.buffer = p.buffer[0:0]
280 // do not update p.pos - use write0
281 _, filename := path.Split(pos.Filename)
282 p.write0([]byte(fmt.Sprintf("[%s:%d:%d]", filename, pos.Line, pos.Column)))
284 if p.Mode&GenHTML != 0 {
285 // write line tag if on a new line
286 // TODO(gri): should write line tags on each line at the start
287 // will be more useful (e.g. to show line numbers)
288 if p.Styler != nil && (pos.Line != p.lastTaggedLine || fileChanged) {
289 p.writeTaggedItem(p.Styler.LineTag(pos.Line))
290 p.lastTaggedLine = pos.Line
292 p.writeTaggedItem(data, tag)
300 // writeCommentPrefix writes the whitespace before a comment.
301 // If there is any pending whitespace, it consumes as much of
302 // it as is likely to help position the comment nicely.
303 // pos is the comment position, next the position of the item
304 // after all pending comments, isFirst indicates if this is the
305 // first comment in a group of comments, and isKeyword indicates
306 // if the next item is a keyword.
308 func (p *printer) writeCommentPrefix(pos, next token.Position, isFirst, isKeyword bool) {
309 if !p.last.IsValid() {
310 // there was no preceeding item and the comment is the
311 // first item to be printed - don't write any whitespace
315 if pos.IsValid() && pos.Filename != p.last.Filename {
316 // comment in a different file - separate with newlines (writeNewlines will limit the number)
317 p.writeNewlines(10, true)
321 if pos.IsValid() && pos.Line == p.last.Line {
322 // comment on the same line as last item:
323 // separate with at least one separator
327 for i, ch := range p.buffer {
330 // ignore any blanks before a comment
334 // respect existing tabs - important
335 // for proper formatting of commented structs
339 // apply pending indentation
347 // make sure there is at least one separator
349 if pos.Line == next.Line {
350 // next item is on the same line as the comment
351 // (which must be a /*-style comment): separate
352 // with a blank instead of a tab
360 // comment on a different line:
361 // separate with at least one line break
364 for i, ch := range p.buffer {
367 // ignore any horizontal whitespace before line breaks
371 // apply pending indentation
374 // if the next token is a keyword, apply the outdent
375 // if it appears that the comment is aligned with the
376 // keyword; otherwise assume the outdent is part of a
377 // closing block and stop (this scenario appears with
378 // comments before a case label where the comments
379 // apply to the next case instead of the current one)
380 if isKeyword && pos.Column == next.Column {
383 case newline, formfeed:
384 // TODO(gri): may want to keep formfeed info in some cases
392 // use formfeeds to break columns before a comment;
393 // this is analogous to using formfeeds to separate
394 // individual lines of /*-style comments
395 // (if !pos.IsValid(), pos.Line == 0, and this will
396 // print no newlines)
397 p.writeNewlines(pos.Line-p.last.Line, true)
402 func (p *printer) writeCommentLine(comment *ast.Comment, pos token.Position, line []byte) {
403 // line must pass through unchanged, bracket it with tabwriter.Escape
404 line = bytes.Join([][]byte{esc, line, esc}, nil)
406 // apply styler, if any
409 line, tag = p.Styler.Comment(comment, line)
412 p.writeItem(pos, line, tag)
416 // TODO(gri): Similar (but not quite identical) functionality for
417 // comment processing can be found in go/doc/comment.go.
418 // Perhaps this can be factored eventually.
420 // Split comment text into lines
421 func split(text []byte) [][]byte {
422 // count lines (comment text never ends in a newline)
424 for _, c := range text {
431 lines := make([][]byte, n)
434 for j, c := range text {
436 lines[n] = text[i:j] // exclude newline
437 i = j + 1 // discard newline
447 func isBlank(s []byte) bool {
448 for _, b := range s {
457 func commonPrefix(a, b []byte) []byte {
459 for i < len(a) && i < len(b) && a[i] == b[i] && (a[i] <= ' ' || a[i] == '*') {
466 func stripCommonPrefix(lines [][]byte) {
468 return // at most one line - nothing to do
472 // The heuristic in this function tries to handle a few
473 // common patterns of /*-style comments: Comments where
474 // the opening /* and closing */ are aligned and the
475 // rest of the comment text is aligned and indented with
476 // blanks or tabs, cases with a vertical "line of stars"
477 // on the left, and cases where the closing */ is on the
478 // same line as the last comment text.
480 // Compute maximum common white prefix of all but the first,
481 // last, and blank lines, and replace blank lines with empty
482 // lines (the first line starts with /* and has no prefix).
483 // In case of two-line comments, consider the last line for
484 // the prefix computation since otherwise the prefix would
487 // Note that the first and last line are never empty (they
488 // contain the opening /* and closing */ respectively) and
489 // thus they can be ignored by the blank line check.
492 for i, line := range lines[1 : len(lines)-1] {
495 lines[1+i] = nil // range starts at line 1
497 prefix = commonPrefix(line, line)
499 prefix = commonPrefix(prefix, line)
502 } else { // len(lines) == 2
504 prefix = commonPrefix(line, line)
508 * Check for vertical "line of stars" and correct prefix accordingly.
511 if i := bytes.Index(prefix, []byte{'*'}); i >= 0 {
512 // Line of stars present.
513 if i > 0 && prefix[i-1] == ' ' {
514 i-- // remove trailing blank from prefix so stars remain aligned
519 // No line of stars present.
520 // Determine the white space on the first line after the /*
521 // and before the beginning of the comment text, assume two
522 // blanks instead of the /* unless the first character after
523 // the /* is a tab. If the first comment line is empty but
524 // for the opening /*, assume up to 3 blanks or a tab. This
525 // whitespace may be found as suffix in the common prefix.
527 if isBlank(first[2:]) {
528 // no comment text on the first line:
529 // reduce prefix by up to 3 blanks or a tab
530 // if present - this keeps comment text indented
531 // relative to the /* and */'s if it was indented
532 // in the first place
534 for n := 0; n < 3 && i > 0 && prefix[i-1] == ' '; n++ {
537 if i == len(prefix) && i > 0 && prefix[i-1] == '\t' {
542 // comment text on the first line
543 suffix := make([]byte, len(first))
544 n := 2 // start after opening /*
545 for n < len(first) && first[n] <= ' ' {
549 if n > 2 && suffix[2] == '\t' {
550 // assume the '\t' compensates for the /*
553 // otherwise assume two blanks
554 suffix[0], suffix[1] = ' ', ' '
557 // Shorten the computed common prefix by the length of
558 // suffix, if it is found as suffix of the prefix.
559 if bytes.HasSuffix(prefix, suffix) {
560 prefix = prefix[0 : len(prefix)-len(suffix)]
565 // Handle last line: If it only contains a closing */, align it
566 // with the opening /*, otherwise align the text with the other
568 last := lines[len(lines)-1]
569 closing := []byte("*/")
570 i := bytes.Index(last, closing)
571 if isBlank(last[0:i]) {
572 // last line only contains closing */
575 // insert an aligning blank
578 lines[len(lines)-1] = bytes.Join([][]byte{prefix, closing}, sep)
580 // last line contains more comment text - assume
581 // it is aligned like the other lines
582 prefix = commonPrefix(prefix, last)
585 // Remove the common prefix from all but the first and empty lines.
586 for i, line := range lines[1:] {
588 lines[1+i] = line[len(prefix):] // range starts at line 1
594 func (p *printer) writeComment(comment *ast.Comment) {
597 // shortcut common case of //-style comments
599 p.writeCommentLine(comment, comment.Pos(), text)
603 // for /*-style comments, print line by line and let the
604 // write function take care of the proper indentation
606 stripCommonPrefix(lines)
608 // write comment lines, separated by formfeed,
609 // without a line break after the last line
610 linebreak := formfeeds[0:1]
612 for i, line := range lines {
618 p.writeCommentLine(comment, pos, line)
624 // writeCommentSuffix writes a line break after a comment if indicated
625 // and processes any leftover indentation information. If a line break
626 // is needed, the kind of break (newline vs formfeed) depends on the
627 // pending whitespace. writeCommentSuffix returns true if a pending
628 // formfeed was dropped from the whitespace buffer.
630 func (p *printer) writeCommentSuffix(needsLinebreak bool) (droppedFF bool) {
631 for i, ch := range p.buffer {
634 // ignore trailing whitespace
636 case indent, unindent:
637 // don't loose indentation information
638 case newline, formfeed:
639 // if we need a line break, keep exactly one
640 // but remember if we dropped any formfeeds
642 needsLinebreak = false
651 p.writeWhitespace(len(p.buffer))
653 // make sure we have a line break
655 p.write([]byte{'\n'})
662 // intersperseComments consumes all comments that appear before the next token
663 // tok and prints it together with the buffered whitespace (i.e., the whitespace
664 // that needs to be written before the next token). A heuristic is used to mix
665 // the comments and whitespace. intersperseComments returns true if a pending
666 // formfeed was dropped from the whitespace buffer.
668 func (p *printer) intersperseComments(next token.Position, tok token.Token) (droppedFF bool) {
669 var last *ast.Comment
670 for ; p.commentBefore(next); p.cindex++ {
671 for _, c := range p.comments[p.cindex].List {
672 p.writeCommentPrefix(c.Pos(), next, last == nil, tok.IsKeyword())
679 if last.Text[1] == '*' && last.Pos().Line == next.Line {
680 // the last comment is a /*-style comment and the next item
681 // follows on the same line: separate with an extra blank
684 // ensure that there is a newline after a //-style comment
685 // or if we are before a closing '}' or at the end of a file
686 return p.writeCommentSuffix(last.Text[1] == '/' || tok == token.RBRACE || tok == token.EOF)
689 // no comment was written - we should never reach here since
690 // intersperseComments should not be called in that case
691 p.internalError("intersperseComments called without pending comments")
696 // whiteWhitespace writes the first n whitespace entries.
697 func (p *printer) writeWhitespace(n int) {
700 for i := 0; i < n; i++ {
701 switch ch := p.buffer[i]; ch {
709 p.internalError("negative indentation:", p.indent)
712 case newline, formfeed:
713 // A line break immediately followed by a "correcting"
714 // unindent is swapped with the unindent - this permits
715 // proper label positioning. If a comment is between
716 // the line break and the label, the unindent is not
717 // part of the comment whitespace prefix and the comment
718 // will be positioned correctly indented.
719 if i+1 < n && p.buffer[i+1] == unindent {
720 // Use a formfeed to terminate the current section.
721 // Otherwise, a long label name on the next line leading
722 // to a wide column may increase the indentation column
723 // of lines before the label; effectively leading to wrong
725 p.buffer[i], p.buffer[i+1] = unindent, formfeed
736 // shift remaining entries down
738 for ; n < len(p.buffer); n++ {
739 p.buffer[i] = p.buffer[n]
742 p.buffer = p.buffer[0:i]
746 // ----------------------------------------------------------------------------
747 // Printing interface
750 func mayCombine(prev token.Token, next byte) (b bool) {
753 b = next == '.' // 1.
755 b = next == '+' // ++
757 b = next == '-' // --
759 b = next == '*' // /*
761 b = next == '-' || next == '<' // <- or <<
763 b = next == '&' || next == '^' // && or &^
769 // print prints a list of "items" (roughly corresponding to syntactic
770 // tokens, but also including whitespace and formatting information).
771 // It is the only print function that should be called directly from
772 // any of the AST printing functions in nodes.go.
774 // Whitespace is accumulated until a non-whitespace token appears. Any
775 // comments that need to appear before that token are printed first,
776 // taking into account the amount and structure of any pending white-
777 // space for best comment placement. Then, any leftover whitespace is
778 // printed, followed by the actual token.
780 func (p *printer) print(args ...interface{}) {
781 for _, f := range args {
782 next := p.pos // estimated position of next item
787 switch x := f.(type) {
790 // don't add ignore's to the buffer; they
791 // may screw up "correcting" unindents (see
796 if i == cap(p.buffer) {
797 // Whitespace sequences are very short so this should
798 // never happen. Handle gracefully (but possibly with
799 // bad comment placement) if it does happen.
803 p.buffer = p.buffer[0 : i+1]
807 data, tag = p.Styler.Ident(x)
809 data = []byte(x.Name)
814 data, tag = p.Styler.BasicLit(x)
818 // escape all literals so they pass through unchanged
819 // (note that valid Go programs cannot contain esc ('\xff')
820 // bytes since they do not appear in legal UTF-8 sequences)
821 // TODO(gri): do this more efficiently.
822 data = []byte("\xff" + string(data) + "\xff")
826 if mayCombine(p.lastTok, s[0]) {
827 // the previous and the current token must be
828 // separated by a blank otherwise they combine
829 // into a different incorrect token sequence
830 // (except for token.INT followed by a '.' this
831 // should never happen because it is taken care
832 // of via binary expression formatting)
833 if len(p.buffer) != 0 {
834 p.internalError("whitespace buffer not empty")
836 p.buffer = p.buffer[0:1]
840 data, tag = p.Styler.Token(x)
847 next = x // accurate position of next item
851 fmt.Fprintf(os.Stderr, "print: unsupported argument type %T\n", f)
852 panic("go/printer type")
858 droppedFF := p.flush(next, tok)
860 // intersperse extra newlines if present in the source
861 // (don't do this in flush as it will cause extra newlines
862 // at the end of a file) - use formfeeds if we dropped one
864 p.writeNewlines(next.Line-p.pos.Line, droppedFF)
866 p.writeItem(next, data, tag)
872 // commentBefore returns true iff the current comment occurs
873 // before the next position in the source code.
875 func (p *printer) commentBefore(next token.Position) bool {
876 return p.cindex < len(p.comments) && p.comments[p.cindex].List[0].Pos().Offset < next.Offset
880 // Flush prints any pending comments and whitespace occuring
881 // textually before the position of the next token tok. Flush
882 // returns true if a pending formfeed character was dropped
883 // from the whitespace buffer as a result of interspersing
886 func (p *printer) flush(next token.Position, tok token.Token) (droppedFF bool) {
887 if p.commentBefore(next) {
888 // if there are comments before the next item, intersperse them
889 droppedFF = p.intersperseComments(next, tok)
891 // otherwise, write any leftover whitespace
892 p.writeWhitespace(len(p.buffer))
898 // ----------------------------------------------------------------------------
901 // A trimmer is an io.Writer filter for stripping tabwriter.Escape
902 // characters, trailing blanks and tabs, and for converting formfeed
903 // and vtab characters into newlines and htabs (in case no tabwriter
904 // is used). Text bracketed by tabwriter.Escape characters is passed
905 // through unchanged.
907 type trimmer struct {
914 // trimmer is implemented as a state machine.
915 // It can be in one of the following states:
923 // Design note: It is tempting to eliminate extra blanks occuring in
924 // whitespace in this function as it could simplify some
925 // of the blanks logic in the node printing functions.
926 // However, this would mess up any formatting done by
929 func (p *trimmer) Write(data []byte) (n int, err os.Error) {
930 m := 0 // if p.state != inSpace, data[m:n] is unwritten
932 for n, b = range data {
934 b = '\t' // convert to htab
940 p.space.WriteByte(b) // WriteByte returns no errors
942 p.space.Reset() // discard trailing space
943 _, err = p.output.Write(newlines[0:1]) // write newline
944 case tabwriter.Escape:
945 _, err = p.output.Write(p.space.Bytes())
948 m = n + 1 // drop tabwriter.Escape
950 _, err = p.output.Write(p.space.Bytes())
956 if b == tabwriter.Escape {
957 _, err = p.output.Write(data[m:n])
963 _, err = p.output.Write(data[m:n])
965 p.space.WriteByte(b) // WriteByte returns no errors
967 data[n] = '\n' // convert to newline
968 case tabwriter.Escape:
969 _, err = p.output.Write(data[m:n])
971 m = n + 1 // drop tabwriter.Escape
980 if p.state != inSpace {
981 _, err = p.output.Write(data[m:n])
989 // ----------------------------------------------------------------------------
992 // General printing is controlled with these Config.Mode flags.
994 GenHTML uint = 1 << iota // generate HTML
995 RawFormat // do not use a tabwriter; if set, UseSpaces is ignored
996 TabIndent // use tabs for indentation independent of UseSpaces
997 UseSpaces // use spaces instead of tabs for alignment
1001 // An HTMLTag specifies a start and end tag.
1002 type HTMLTag struct {
1003 Start, End string // empty if tags are absent
1007 // A Styler specifies formatting of line tags and elementary Go words.
1008 // A format consists of text and a (possibly empty) surrounding HTML tag.
1010 type Styler interface {
1011 LineTag(line int) ([]byte, HTMLTag)
1012 Comment(c *ast.Comment, line []byte) ([]byte, HTMLTag)
1013 BasicLit(x *ast.BasicLit) ([]byte, HTMLTag)
1014 Ident(id *ast.Ident) ([]byte, HTMLTag)
1015 Token(tok token.Token) ([]byte, HTMLTag)
1019 // A Config node controls the output of Fprint.
1020 type Config struct {
1021 Mode uint // default: 0
1022 Tabwidth int // default: 8
1023 Styler Styler // default: nil
1027 // Fprint "pretty-prints" an AST node to output and returns the number
1028 // of bytes written and an error (if any) for a given configuration cfg.
1029 // The node type must be *ast.File, or assignment-compatible to ast.Expr,
1030 // ast.Decl, ast.Spec, or ast.Stmt.
1032 func (cfg *Config) Fprint(output io.Writer, node interface{}) (int, os.Error) {
1033 // redirect output through a trimmer to eliminate trailing whitespace
1034 // (Input to a tabwriter must be untrimmed since trailing tabs provide
1035 // formatting information. The tabwriter could provide trimming
1036 // functionality but no tabwriter is used when RawFormat is set.)
1037 output = &trimmer{output: output}
1039 // setup tabwriter if needed and redirect output
1040 var tw *tabwriter.Writer
1041 if cfg.Mode&RawFormat == 0 {
1042 minwidth := cfg.Tabwidth
1044 padchar := byte('\t')
1045 if cfg.Mode&UseSpaces != 0 {
1049 twmode := tabwriter.DiscardEmptyColumns
1050 if cfg.Mode&GenHTML != 0 {
1051 twmode |= tabwriter.FilterHTML
1053 if cfg.Mode&TabIndent != 0 {
1055 twmode |= tabwriter.TabIndent
1058 tw = tabwriter.NewWriter(output, minwidth, cfg.Tabwidth, 1, padchar, twmode)
1062 // setup printer and print node
1066 switch n := node.(type) {
1069 p.useNodeComments = true
1070 p.expr(n, ignoreMultiLine)
1073 p.useNodeComments = true
1074 // A labeled statement will un-indent to position the
1075 // label. Set indent to 1 so we don't get indent "underflow".
1076 if _, labeledStmt := n.(*ast.LabeledStmt); labeledStmt {
1079 p.stmt(n, false, ignoreMultiLine)
1082 p.useNodeComments = true
1083 p.decl(n, ignoreMultiLine)
1086 p.useNodeComments = true
1087 p.spec(n, 1, false, ignoreMultiLine)
1090 p.comments = n.Comments
1091 p.useNodeComments = n.Comments == nil
1094 p.errors <- fmt.Errorf("printer.Fprint: unsupported node type %T", n)
1097 p.flush(token.Position{Offset: infinity, Line: infinity}, token.EOF)
1098 p.errors <- nil // no errors
1100 err := <-p.errors // wait for completion of goroutine
1102 // flush tabwriter, if any
1104 tw.Flush() // ignore errors
1107 return p.written, err
1111 // Fprint "pretty-prints" an AST node to output.
1112 // It calls Config.Fprint with default settings.
1114 func Fprint(output io.Writer, node interface{}) os.Error {
1115 _, err := (&Config{Tabwidth: 8}).Fprint(output, node) // don't care about number of bytes written