]> git.ipfire.org Git - thirdparty/gcc.git/blob - libgo/go/go/printer/printer.go
Add Go frontend, libgo library, and Go testsuite.
[thirdparty/gcc.git] / libgo / go / go / printer / printer.go
1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 // The printer package implements printing of AST nodes.
6 package printer
7
8 import (
9 "bytes"
10 "fmt"
11 "go/ast"
12 "go/token"
13 "io"
14 "os"
15 "path"
16 "runtime"
17 "tabwriter"
18 )
19
20
21 const debug = false // enable for debugging
22
23
24 type whiteSpace int
25
26 const (
27 ignore = whiteSpace(0)
28 blank = whiteSpace(' ')
29 vtab = whiteSpace('\v')
30 newline = whiteSpace('\n')
31 formfeed = whiteSpace('\f')
32 indent = whiteSpace('>')
33 unindent = whiteSpace('<')
34 )
35
36
37 var (
38 esc = []byte{tabwriter.Escape}
39 htab = []byte{'\t'}
40 htabs = []byte("\t\t\t\t\t\t\t\t")
41 newlines = []byte("\n\n\n\n\n\n\n\n") // more than the max determined by nlines
42 formfeeds = []byte("\f\f\f\f\f\f\f\f") // more than the max determined by nlines
43
44 esc_quot = []byte("&#34;") // shorter than "&quot;"
45 esc_apos = []byte("&#39;") // shorter than "&apos;"
46 esc_amp = []byte("&amp;")
47 esc_lt = []byte("&lt;")
48 esc_gt = []byte("&gt;")
49 )
50
51
52 // Special positions
53 var noPos token.Position // use noPos when a position is needed but not known
54 var infinity = 1 << 30
55
56
57 // Use ignoreMultiLine if the multiLine information is not important.
58 var ignoreMultiLine = new(bool)
59
60
61 type printer struct {
62 // Configuration (does not change after initialization)
63 output io.Writer
64 Config
65 errors chan os.Error
66
67 // Current state
68 nesting int // nesting level (0: top-level (package scope), >0: functions/decls.)
69 written int // number of bytes written
70 indent int // current indentation
71 escape bool // true if in escape sequence
72 lastTok token.Token // the last token printed (token.ILLEGAL if it's whitespace)
73
74 // Buffered whitespace
75 buffer []whiteSpace
76
77 // The (possibly estimated) position in the generated output;
78 // in AST space (i.e., pos is set whenever a token position is
79 // known accurately, and updated dependending on what has been
80 // written).
81 pos token.Position
82
83 // The value of pos immediately after the last item has been
84 // written using writeItem.
85 last token.Position
86
87 // HTML support
88 lastTaggedLine int // last line for which a line tag was written
89
90 // The list of all source comments, in order of appearance.
91 comments []*ast.CommentGroup // may be nil
92 cindex int // current comment index
93 useNodeComments bool // if not set, ignore lead and line comments of nodes
94 }
95
96
97 func (p *printer) init(output io.Writer, cfg *Config) {
98 p.output = output
99 p.Config = *cfg
100 p.errors = make(chan os.Error)
101 p.buffer = make([]whiteSpace, 0, 16) // whitespace sequences are short
102 }
103
104
105 func (p *printer) internalError(msg ...interface{}) {
106 if debug {
107 fmt.Print(p.pos.String() + ": ")
108 fmt.Println(msg...)
109 panic("go/printer")
110 }
111 }
112
113
114 // nlines returns the adjusted number of linebreaks given the desired number
115 // of breaks n such that min <= result <= max where max depends on the current
116 // nesting level.
117 //
118 func (p *printer) nlines(n, min int) int {
119 if n < min {
120 return min
121 }
122 max := 3 // max. number of newlines at the top level (p.nesting == 0)
123 if p.nesting > 0 {
124 max = 2 // max. number of newlines everywhere else
125 }
126 if n > max {
127 return max
128 }
129 return n
130 }
131
132
133 // write0 writes raw (uninterpreted) data to p.output and handles errors.
134 // write0 does not indent after newlines, and does not HTML-escape or update p.pos.
135 //
136 func (p *printer) write0(data []byte) {
137 n, err := p.output.Write(data)
138 p.written += n
139 if err != nil {
140 p.errors <- err
141 runtime.Goexit()
142 }
143 }
144
145
146 // write interprets data and writes it to p.output. It inserts indentation
147 // after a line break unless in a tabwriter escape sequence, and it HTML-
148 // escapes characters if GenHTML is set. It updates p.pos as a side-effect.
149 //
150 func (p *printer) write(data []byte) {
151 i0 := 0
152 for i, b := range data {
153 switch b {
154 case '\n', '\f':
155 // write segment ending in b
156 p.write0(data[i0 : i+1])
157
158 // update p.pos
159 p.pos.Offset += i + 1 - i0
160 p.pos.Line++
161 p.pos.Column = 1
162
163 if !p.escape {
164 // write indentation
165 // use "hard" htabs - indentation columns
166 // must not be discarded by the tabwriter
167 j := p.indent
168 for ; j > len(htabs); j -= len(htabs) {
169 p.write0(htabs)
170 }
171 p.write0(htabs[0:j])
172
173 // update p.pos
174 p.pos.Offset += p.indent
175 p.pos.Column += p.indent
176 }
177
178 // next segment start
179 i0 = i + 1
180
181 case '"', '\'', '&', '<', '>':
182 if p.Mode&GenHTML != 0 {
183 // write segment ending in b
184 p.write0(data[i0:i])
185
186 // write HTML-escaped b
187 var esc []byte
188 switch b {
189 case '"':
190 esc = esc_quot
191 case '\'':
192 esc = esc_apos
193 case '&':
194 esc = esc_amp
195 case '<':
196 esc = esc_lt
197 case '>':
198 esc = esc_gt
199 }
200 p.write0(esc)
201
202 // update p.pos
203 d := i + 1 - i0
204 p.pos.Offset += d
205 p.pos.Column += d
206
207 // next segment start
208 i0 = i + 1
209 }
210
211 case tabwriter.Escape:
212 p.escape = !p.escape
213
214 // ignore escape chars introduced by printer - they are
215 // invisible and must not affect p.pos (was issue #1089)
216 p.pos.Offset--
217 p.pos.Column--
218 }
219 }
220
221 // write remaining segment
222 p.write0(data[i0:])
223
224 // update p.pos
225 d := len(data) - i0
226 p.pos.Offset += d
227 p.pos.Column += d
228 }
229
230
231 func (p *printer) writeNewlines(n int, useFF bool) {
232 if n > 0 {
233 n = p.nlines(n, 0)
234 if useFF {
235 p.write(formfeeds[0:n])
236 } else {
237 p.write(newlines[0:n])
238 }
239 }
240 }
241
242
243 func (p *printer) writeTaggedItem(data []byte, tag HTMLTag) {
244 // write start tag, if any
245 // (no html-escaping and no p.pos update for tags - use write0)
246 if tag.Start != "" {
247 p.write0([]byte(tag.Start))
248 }
249 p.write(data)
250 // write end tag, if any
251 if tag.End != "" {
252 p.write0([]byte(tag.End))
253 }
254 }
255
256
257 // writeItem writes data at position pos. data is the text corresponding to
258 // a single lexical token, but may also be comment text. pos is the actual
259 // (or at least very accurately estimated) position of the data in the original
260 // source text. If tags are present and GenHTML is set, the tags are written
261 // before and after the data. writeItem updates p.last to the position
262 // immediately following the data.
263 //
264 func (p *printer) writeItem(pos token.Position, data []byte, tag HTMLTag) {
265 fileChanged := false
266 if pos.IsValid() {
267 // continue with previous position if we don't have a valid pos
268 if p.last.IsValid() && p.last.Filename != pos.Filename {
269 // the file has changed - reset state
270 // (used when printing merged ASTs of different files
271 // e.g., the result of ast.MergePackageFiles)
272 p.indent = 0
273 p.escape = false
274 p.buffer = p.buffer[0:0]
275 fileChanged = true
276 }
277 p.pos = pos
278 }
279 if debug {
280 // do not update p.pos - use write0
281 _, filename := path.Split(pos.Filename)
282 p.write0([]byte(fmt.Sprintf("[%s:%d:%d]", filename, pos.Line, pos.Column)))
283 }
284 if p.Mode&GenHTML != 0 {
285 // write line tag if on a new line
286 // TODO(gri): should write line tags on each line at the start
287 // will be more useful (e.g. to show line numbers)
288 if p.Styler != nil && (pos.Line != p.lastTaggedLine || fileChanged) {
289 p.writeTaggedItem(p.Styler.LineTag(pos.Line))
290 p.lastTaggedLine = pos.Line
291 }
292 p.writeTaggedItem(data, tag)
293 } else {
294 p.write(data)
295 }
296 p.last = p.pos
297 }
298
299
300 // writeCommentPrefix writes the whitespace before a comment.
301 // If there is any pending whitespace, it consumes as much of
302 // it as is likely to help position the comment nicely.
303 // pos is the comment position, next the position of the item
304 // after all pending comments, isFirst indicates if this is the
305 // first comment in a group of comments, and isKeyword indicates
306 // if the next item is a keyword.
307 //
308 func (p *printer) writeCommentPrefix(pos, next token.Position, isFirst, isKeyword bool) {
309 if !p.last.IsValid() {
310 // there was no preceeding item and the comment is the
311 // first item to be printed - don't write any whitespace
312 return
313 }
314
315 if pos.IsValid() && pos.Filename != p.last.Filename {
316 // comment in a different file - separate with newlines (writeNewlines will limit the number)
317 p.writeNewlines(10, true)
318 return
319 }
320
321 if pos.IsValid() && pos.Line == p.last.Line {
322 // comment on the same line as last item:
323 // separate with at least one separator
324 hasSep := false
325 if isFirst {
326 j := 0
327 for i, ch := range p.buffer {
328 switch ch {
329 case blank:
330 // ignore any blanks before a comment
331 p.buffer[i] = ignore
332 continue
333 case vtab:
334 // respect existing tabs - important
335 // for proper formatting of commented structs
336 hasSep = true
337 continue
338 case indent:
339 // apply pending indentation
340 continue
341 }
342 j = i
343 break
344 }
345 p.writeWhitespace(j)
346 }
347 // make sure there is at least one separator
348 if !hasSep {
349 if pos.Line == next.Line {
350 // next item is on the same line as the comment
351 // (which must be a /*-style comment): separate
352 // with a blank instead of a tab
353 p.write([]byte{' '})
354 } else {
355 p.write(htab)
356 }
357 }
358
359 } else {
360 // comment on a different line:
361 // separate with at least one line break
362 if isFirst {
363 j := 0
364 for i, ch := range p.buffer {
365 switch ch {
366 case blank, vtab:
367 // ignore any horizontal whitespace before line breaks
368 p.buffer[i] = ignore
369 continue
370 case indent:
371 // apply pending indentation
372 continue
373 case unindent:
374 // if the next token is a keyword, apply the outdent
375 // if it appears that the comment is aligned with the
376 // keyword; otherwise assume the outdent is part of a
377 // closing block and stop (this scenario appears with
378 // comments before a case label where the comments
379 // apply to the next case instead of the current one)
380 if isKeyword && pos.Column == next.Column {
381 continue
382 }
383 case newline, formfeed:
384 // TODO(gri): may want to keep formfeed info in some cases
385 p.buffer[i] = ignore
386 }
387 j = i
388 break
389 }
390 p.writeWhitespace(j)
391 }
392 // use formfeeds to break columns before a comment;
393 // this is analogous to using formfeeds to separate
394 // individual lines of /*-style comments
395 // (if !pos.IsValid(), pos.Line == 0, and this will
396 // print no newlines)
397 p.writeNewlines(pos.Line-p.last.Line, true)
398 }
399 }
400
401
402 func (p *printer) writeCommentLine(comment *ast.Comment, pos token.Position, line []byte) {
403 // line must pass through unchanged, bracket it with tabwriter.Escape
404 line = bytes.Join([][]byte{esc, line, esc}, nil)
405
406 // apply styler, if any
407 var tag HTMLTag
408 if p.Styler != nil {
409 line, tag = p.Styler.Comment(comment, line)
410 }
411
412 p.writeItem(pos, line, tag)
413 }
414
415
416 // TODO(gri): Similar (but not quite identical) functionality for
417 // comment processing can be found in go/doc/comment.go.
418 // Perhaps this can be factored eventually.
419
420 // Split comment text into lines
421 func split(text []byte) [][]byte {
422 // count lines (comment text never ends in a newline)
423 n := 1
424 for _, c := range text {
425 if c == '\n' {
426 n++
427 }
428 }
429
430 // split
431 lines := make([][]byte, n)
432 n = 0
433 i := 0
434 for j, c := range text {
435 if c == '\n' {
436 lines[n] = text[i:j] // exclude newline
437 i = j + 1 // discard newline
438 n++
439 }
440 }
441 lines[n] = text[i:]
442
443 return lines
444 }
445
446
447 func isBlank(s []byte) bool {
448 for _, b := range s {
449 if b > ' ' {
450 return false
451 }
452 }
453 return true
454 }
455
456
457 func commonPrefix(a, b []byte) []byte {
458 i := 0
459 for i < len(a) && i < len(b) && a[i] == b[i] && (a[i] <= ' ' || a[i] == '*') {
460 i++
461 }
462 return a[0:i]
463 }
464
465
466 func stripCommonPrefix(lines [][]byte) {
467 if len(lines) < 2 {
468 return // at most one line - nothing to do
469 }
470 // len(lines) >= 2
471
472 // The heuristic in this function tries to handle a few
473 // common patterns of /*-style comments: Comments where
474 // the opening /* and closing */ are aligned and the
475 // rest of the comment text is aligned and indented with
476 // blanks or tabs, cases with a vertical "line of stars"
477 // on the left, and cases where the closing */ is on the
478 // same line as the last comment text.
479
480 // Compute maximum common white prefix of all but the first,
481 // last, and blank lines, and replace blank lines with empty
482 // lines (the first line starts with /* and has no prefix).
483 // In case of two-line comments, consider the last line for
484 // the prefix computation since otherwise the prefix would
485 // be empty.
486 //
487 // Note that the first and last line are never empty (they
488 // contain the opening /* and closing */ respectively) and
489 // thus they can be ignored by the blank line check.
490 var prefix []byte
491 if len(lines) > 2 {
492 for i, line := range lines[1 : len(lines)-1] {
493 switch {
494 case isBlank(line):
495 lines[1+i] = nil // range starts at line 1
496 case prefix == nil:
497 prefix = commonPrefix(line, line)
498 default:
499 prefix = commonPrefix(prefix, line)
500 }
501 }
502 } else { // len(lines) == 2
503 line := lines[1]
504 prefix = commonPrefix(line, line)
505 }
506
507 /*
508 * Check for vertical "line of stars" and correct prefix accordingly.
509 */
510 lineOfStars := false
511 if i := bytes.Index(prefix, []byte{'*'}); i >= 0 {
512 // Line of stars present.
513 if i > 0 && prefix[i-1] == ' ' {
514 i-- // remove trailing blank from prefix so stars remain aligned
515 }
516 prefix = prefix[0:i]
517 lineOfStars = true
518 } else {
519 // No line of stars present.
520 // Determine the white space on the first line after the /*
521 // and before the beginning of the comment text, assume two
522 // blanks instead of the /* unless the first character after
523 // the /* is a tab. If the first comment line is empty but
524 // for the opening /*, assume up to 3 blanks or a tab. This
525 // whitespace may be found as suffix in the common prefix.
526 first := lines[0]
527 if isBlank(first[2:]) {
528 // no comment text on the first line:
529 // reduce prefix by up to 3 blanks or a tab
530 // if present - this keeps comment text indented
531 // relative to the /* and */'s if it was indented
532 // in the first place
533 i := len(prefix)
534 for n := 0; n < 3 && i > 0 && prefix[i-1] == ' '; n++ {
535 i--
536 }
537 if i == len(prefix) && i > 0 && prefix[i-1] == '\t' {
538 i--
539 }
540 prefix = prefix[0:i]
541 } else {
542 // comment text on the first line
543 suffix := make([]byte, len(first))
544 n := 2 // start after opening /*
545 for n < len(first) && first[n] <= ' ' {
546 suffix[n] = first[n]
547 n++
548 }
549 if n > 2 && suffix[2] == '\t' {
550 // assume the '\t' compensates for the /*
551 suffix = suffix[2:n]
552 } else {
553 // otherwise assume two blanks
554 suffix[0], suffix[1] = ' ', ' '
555 suffix = suffix[0:n]
556 }
557 // Shorten the computed common prefix by the length of
558 // suffix, if it is found as suffix of the prefix.
559 if bytes.HasSuffix(prefix, suffix) {
560 prefix = prefix[0 : len(prefix)-len(suffix)]
561 }
562 }
563 }
564
565 // Handle last line: If it only contains a closing */, align it
566 // with the opening /*, otherwise align the text with the other
567 // lines.
568 last := lines[len(lines)-1]
569 closing := []byte("*/")
570 i := bytes.Index(last, closing)
571 if isBlank(last[0:i]) {
572 // last line only contains closing */
573 var sep []byte
574 if lineOfStars {
575 // insert an aligning blank
576 sep = []byte{' '}
577 }
578 lines[len(lines)-1] = bytes.Join([][]byte{prefix, closing}, sep)
579 } else {
580 // last line contains more comment text - assume
581 // it is aligned like the other lines
582 prefix = commonPrefix(prefix, last)
583 }
584
585 // Remove the common prefix from all but the first and empty lines.
586 for i, line := range lines[1:] {
587 if len(line) != 0 {
588 lines[1+i] = line[len(prefix):] // range starts at line 1
589 }
590 }
591 }
592
593
594 func (p *printer) writeComment(comment *ast.Comment) {
595 text := comment.Text
596
597 // shortcut common case of //-style comments
598 if text[1] == '/' {
599 p.writeCommentLine(comment, comment.Pos(), text)
600 return
601 }
602
603 // for /*-style comments, print line by line and let the
604 // write function take care of the proper indentation
605 lines := split(text)
606 stripCommonPrefix(lines)
607
608 // write comment lines, separated by formfeed,
609 // without a line break after the last line
610 linebreak := formfeeds[0:1]
611 pos := comment.Pos()
612 for i, line := range lines {
613 if i > 0 {
614 p.write(linebreak)
615 pos = p.pos
616 }
617 if len(line) > 0 {
618 p.writeCommentLine(comment, pos, line)
619 }
620 }
621 }
622
623
624 // writeCommentSuffix writes a line break after a comment if indicated
625 // and processes any leftover indentation information. If a line break
626 // is needed, the kind of break (newline vs formfeed) depends on the
627 // pending whitespace. writeCommentSuffix returns true if a pending
628 // formfeed was dropped from the whitespace buffer.
629 //
630 func (p *printer) writeCommentSuffix(needsLinebreak bool) (droppedFF bool) {
631 for i, ch := range p.buffer {
632 switch ch {
633 case blank, vtab:
634 // ignore trailing whitespace
635 p.buffer[i] = ignore
636 case indent, unindent:
637 // don't loose indentation information
638 case newline, formfeed:
639 // if we need a line break, keep exactly one
640 // but remember if we dropped any formfeeds
641 if needsLinebreak {
642 needsLinebreak = false
643 } else {
644 if ch == formfeed {
645 droppedFF = true
646 }
647 p.buffer[i] = ignore
648 }
649 }
650 }
651 p.writeWhitespace(len(p.buffer))
652
653 // make sure we have a line break
654 if needsLinebreak {
655 p.write([]byte{'\n'})
656 }
657
658 return
659 }
660
661
662 // intersperseComments consumes all comments that appear before the next token
663 // tok and prints it together with the buffered whitespace (i.e., the whitespace
664 // that needs to be written before the next token). A heuristic is used to mix
665 // the comments and whitespace. intersperseComments returns true if a pending
666 // formfeed was dropped from the whitespace buffer.
667 //
668 func (p *printer) intersperseComments(next token.Position, tok token.Token) (droppedFF bool) {
669 var last *ast.Comment
670 for ; p.commentBefore(next); p.cindex++ {
671 for _, c := range p.comments[p.cindex].List {
672 p.writeCommentPrefix(c.Pos(), next, last == nil, tok.IsKeyword())
673 p.writeComment(c)
674 last = c
675 }
676 }
677
678 if last != nil {
679 if last.Text[1] == '*' && last.Pos().Line == next.Line {
680 // the last comment is a /*-style comment and the next item
681 // follows on the same line: separate with an extra blank
682 p.write([]byte{' '})
683 }
684 // ensure that there is a newline after a //-style comment
685 // or if we are before a closing '}' or at the end of a file
686 return p.writeCommentSuffix(last.Text[1] == '/' || tok == token.RBRACE || tok == token.EOF)
687 }
688
689 // no comment was written - we should never reach here since
690 // intersperseComments should not be called in that case
691 p.internalError("intersperseComments called without pending comments")
692 return false
693 }
694
695
696 // whiteWhitespace writes the first n whitespace entries.
697 func (p *printer) writeWhitespace(n int) {
698 // write entries
699 var data [1]byte
700 for i := 0; i < n; i++ {
701 switch ch := p.buffer[i]; ch {
702 case ignore:
703 // ignore!
704 case indent:
705 p.indent++
706 case unindent:
707 p.indent--
708 if p.indent < 0 {
709 p.internalError("negative indentation:", p.indent)
710 p.indent = 0
711 }
712 case newline, formfeed:
713 // A line break immediately followed by a "correcting"
714 // unindent is swapped with the unindent - this permits
715 // proper label positioning. If a comment is between
716 // the line break and the label, the unindent is not
717 // part of the comment whitespace prefix and the comment
718 // will be positioned correctly indented.
719 if i+1 < n && p.buffer[i+1] == unindent {
720 // Use a formfeed to terminate the current section.
721 // Otherwise, a long label name on the next line leading
722 // to a wide column may increase the indentation column
723 // of lines before the label; effectively leading to wrong
724 // indentation.
725 p.buffer[i], p.buffer[i+1] = unindent, formfeed
726 i-- // do it again
727 continue
728 }
729 fallthrough
730 default:
731 data[0] = byte(ch)
732 p.write(data[0:])
733 }
734 }
735
736 // shift remaining entries down
737 i := 0
738 for ; n < len(p.buffer); n++ {
739 p.buffer[i] = p.buffer[n]
740 i++
741 }
742 p.buffer = p.buffer[0:i]
743 }
744
745
746 // ----------------------------------------------------------------------------
747 // Printing interface
748
749
750 func mayCombine(prev token.Token, next byte) (b bool) {
751 switch prev {
752 case token.INT:
753 b = next == '.' // 1.
754 case token.ADD:
755 b = next == '+' // ++
756 case token.SUB:
757 b = next == '-' // --
758 case token.QUO:
759 b = next == '*' // /*
760 case token.LSS:
761 b = next == '-' || next == '<' // <- or <<
762 case token.AND:
763 b = next == '&' || next == '^' // && or &^
764 }
765 return
766 }
767
768
769 // print prints a list of "items" (roughly corresponding to syntactic
770 // tokens, but also including whitespace and formatting information).
771 // It is the only print function that should be called directly from
772 // any of the AST printing functions in nodes.go.
773 //
774 // Whitespace is accumulated until a non-whitespace token appears. Any
775 // comments that need to appear before that token are printed first,
776 // taking into account the amount and structure of any pending white-
777 // space for best comment placement. Then, any leftover whitespace is
778 // printed, followed by the actual token.
779 //
780 func (p *printer) print(args ...interface{}) {
781 for _, f := range args {
782 next := p.pos // estimated position of next item
783 var data []byte
784 var tag HTMLTag
785 var tok token.Token
786
787 switch x := f.(type) {
788 case whiteSpace:
789 if x == ignore {
790 // don't add ignore's to the buffer; they
791 // may screw up "correcting" unindents (see
792 // LabeledStmt)
793 break
794 }
795 i := len(p.buffer)
796 if i == cap(p.buffer) {
797 // Whitespace sequences are very short so this should
798 // never happen. Handle gracefully (but possibly with
799 // bad comment placement) if it does happen.
800 p.writeWhitespace(i)
801 i = 0
802 }
803 p.buffer = p.buffer[0 : i+1]
804 p.buffer[i] = x
805 case *ast.Ident:
806 if p.Styler != nil {
807 data, tag = p.Styler.Ident(x)
808 } else {
809 data = []byte(x.Name)
810 }
811 tok = token.IDENT
812 case *ast.BasicLit:
813 if p.Styler != nil {
814 data, tag = p.Styler.BasicLit(x)
815 } else {
816 data = x.Value
817 }
818 // escape all literals so they pass through unchanged
819 // (note that valid Go programs cannot contain esc ('\xff')
820 // bytes since they do not appear in legal UTF-8 sequences)
821 // TODO(gri): do this more efficiently.
822 data = []byte("\xff" + string(data) + "\xff")
823 tok = x.Kind
824 case token.Token:
825 s := x.String()
826 if mayCombine(p.lastTok, s[0]) {
827 // the previous and the current token must be
828 // separated by a blank otherwise they combine
829 // into a different incorrect token sequence
830 // (except for token.INT followed by a '.' this
831 // should never happen because it is taken care
832 // of via binary expression formatting)
833 if len(p.buffer) != 0 {
834 p.internalError("whitespace buffer not empty")
835 }
836 p.buffer = p.buffer[0:1]
837 p.buffer[0] = ' '
838 }
839 if p.Styler != nil {
840 data, tag = p.Styler.Token(x)
841 } else {
842 data = []byte(s)
843 }
844 tok = x
845 case token.Position:
846 if x.IsValid() {
847 next = x // accurate position of next item
848 }
849 tok = p.lastTok
850 default:
851 fmt.Fprintf(os.Stderr, "print: unsupported argument type %T\n", f)
852 panic("go/printer type")
853 }
854 p.lastTok = tok
855 p.pos = next
856
857 if data != nil {
858 droppedFF := p.flush(next, tok)
859
860 // intersperse extra newlines if present in the source
861 // (don't do this in flush as it will cause extra newlines
862 // at the end of a file) - use formfeeds if we dropped one
863 // before
864 p.writeNewlines(next.Line-p.pos.Line, droppedFF)
865
866 p.writeItem(next, data, tag)
867 }
868 }
869 }
870
871
872 // commentBefore returns true iff the current comment occurs
873 // before the next position in the source code.
874 //
875 func (p *printer) commentBefore(next token.Position) bool {
876 return p.cindex < len(p.comments) && p.comments[p.cindex].List[0].Pos().Offset < next.Offset
877 }
878
879
880 // Flush prints any pending comments and whitespace occuring
881 // textually before the position of the next token tok. Flush
882 // returns true if a pending formfeed character was dropped
883 // from the whitespace buffer as a result of interspersing
884 // comments.
885 //
886 func (p *printer) flush(next token.Position, tok token.Token) (droppedFF bool) {
887 if p.commentBefore(next) {
888 // if there are comments before the next item, intersperse them
889 droppedFF = p.intersperseComments(next, tok)
890 } else {
891 // otherwise, write any leftover whitespace
892 p.writeWhitespace(len(p.buffer))
893 }
894 return
895 }
896
897
898 // ----------------------------------------------------------------------------
899 // Trimmer
900
901 // A trimmer is an io.Writer filter for stripping tabwriter.Escape
902 // characters, trailing blanks and tabs, and for converting formfeed
903 // and vtab characters into newlines and htabs (in case no tabwriter
904 // is used). Text bracketed by tabwriter.Escape characters is passed
905 // through unchanged.
906 //
907 type trimmer struct {
908 output io.Writer
909 space bytes.Buffer
910 state int
911 }
912
913
914 // trimmer is implemented as a state machine.
915 // It can be in one of the following states:
916 const (
917 inSpace = iota
918 inEscape
919 inText
920 )
921
922
923 // Design note: It is tempting to eliminate extra blanks occuring in
924 // whitespace in this function as it could simplify some
925 // of the blanks logic in the node printing functions.
926 // However, this would mess up any formatting done by
927 // the tabwriter.
928
929 func (p *trimmer) Write(data []byte) (n int, err os.Error) {
930 m := 0 // if p.state != inSpace, data[m:n] is unwritten
931 var b byte
932 for n, b = range data {
933 if b == '\v' {
934 b = '\t' // convert to htab
935 }
936 switch p.state {
937 case inSpace:
938 switch b {
939 case '\t', ' ':
940 p.space.WriteByte(b) // WriteByte returns no errors
941 case '\f', '\n':
942 p.space.Reset() // discard trailing space
943 _, err = p.output.Write(newlines[0:1]) // write newline
944 case tabwriter.Escape:
945 _, err = p.output.Write(p.space.Bytes())
946 p.space.Reset()
947 p.state = inEscape
948 m = n + 1 // drop tabwriter.Escape
949 default:
950 _, err = p.output.Write(p.space.Bytes())
951 p.space.Reset()
952 p.state = inText
953 m = n
954 }
955 case inEscape:
956 if b == tabwriter.Escape {
957 _, err = p.output.Write(data[m:n])
958 p.state = inSpace
959 }
960 case inText:
961 switch b {
962 case '\t', ' ':
963 _, err = p.output.Write(data[m:n])
964 p.state = inSpace
965 p.space.WriteByte(b) // WriteByte returns no errors
966 case '\f':
967 data[n] = '\n' // convert to newline
968 case tabwriter.Escape:
969 _, err = p.output.Write(data[m:n])
970 p.state = inEscape
971 m = n + 1 // drop tabwriter.Escape
972 }
973 }
974 if err != nil {
975 return
976 }
977 }
978 n = len(data)
979
980 if p.state != inSpace {
981 _, err = p.output.Write(data[m:n])
982 p.state = inSpace
983 }
984
985 return
986 }
987
988
989 // ----------------------------------------------------------------------------
990 // Public interface
991
992 // General printing is controlled with these Config.Mode flags.
993 const (
994 GenHTML uint = 1 << iota // generate HTML
995 RawFormat // do not use a tabwriter; if set, UseSpaces is ignored
996 TabIndent // use tabs for indentation independent of UseSpaces
997 UseSpaces // use spaces instead of tabs for alignment
998 )
999
1000
1001 // An HTMLTag specifies a start and end tag.
1002 type HTMLTag struct {
1003 Start, End string // empty if tags are absent
1004 }
1005
1006
1007 // A Styler specifies formatting of line tags and elementary Go words.
1008 // A format consists of text and a (possibly empty) surrounding HTML tag.
1009 //
1010 type Styler interface {
1011 LineTag(line int) ([]byte, HTMLTag)
1012 Comment(c *ast.Comment, line []byte) ([]byte, HTMLTag)
1013 BasicLit(x *ast.BasicLit) ([]byte, HTMLTag)
1014 Ident(id *ast.Ident) ([]byte, HTMLTag)
1015 Token(tok token.Token) ([]byte, HTMLTag)
1016 }
1017
1018
1019 // A Config node controls the output of Fprint.
1020 type Config struct {
1021 Mode uint // default: 0
1022 Tabwidth int // default: 8
1023 Styler Styler // default: nil
1024 }
1025
1026
1027 // Fprint "pretty-prints" an AST node to output and returns the number
1028 // of bytes written and an error (if any) for a given configuration cfg.
1029 // The node type must be *ast.File, or assignment-compatible to ast.Expr,
1030 // ast.Decl, ast.Spec, or ast.Stmt.
1031 //
1032 func (cfg *Config) Fprint(output io.Writer, node interface{}) (int, os.Error) {
1033 // redirect output through a trimmer to eliminate trailing whitespace
1034 // (Input to a tabwriter must be untrimmed since trailing tabs provide
1035 // formatting information. The tabwriter could provide trimming
1036 // functionality but no tabwriter is used when RawFormat is set.)
1037 output = &trimmer{output: output}
1038
1039 // setup tabwriter if needed and redirect output
1040 var tw *tabwriter.Writer
1041 if cfg.Mode&RawFormat == 0 {
1042 minwidth := cfg.Tabwidth
1043
1044 padchar := byte('\t')
1045 if cfg.Mode&UseSpaces != 0 {
1046 padchar = ' '
1047 }
1048
1049 twmode := tabwriter.DiscardEmptyColumns
1050 if cfg.Mode&GenHTML != 0 {
1051 twmode |= tabwriter.FilterHTML
1052 }
1053 if cfg.Mode&TabIndent != 0 {
1054 minwidth = 0
1055 twmode |= tabwriter.TabIndent
1056 }
1057
1058 tw = tabwriter.NewWriter(output, minwidth, cfg.Tabwidth, 1, padchar, twmode)
1059 output = tw
1060 }
1061
1062 // setup printer and print node
1063 var p printer
1064 p.init(output, cfg)
1065 go func() {
1066 switch n := node.(type) {
1067 case ast.Expr:
1068 p.nesting = 1
1069 p.useNodeComments = true
1070 p.expr(n, ignoreMultiLine)
1071 case ast.Stmt:
1072 p.nesting = 1
1073 p.useNodeComments = true
1074 // A labeled statement will un-indent to position the
1075 // label. Set indent to 1 so we don't get indent "underflow".
1076 if _, labeledStmt := n.(*ast.LabeledStmt); labeledStmt {
1077 p.indent = 1
1078 }
1079 p.stmt(n, false, ignoreMultiLine)
1080 case ast.Decl:
1081 p.nesting = 1
1082 p.useNodeComments = true
1083 p.decl(n, ignoreMultiLine)
1084 case ast.Spec:
1085 p.nesting = 1
1086 p.useNodeComments = true
1087 p.spec(n, 1, false, ignoreMultiLine)
1088 case *ast.File:
1089 p.nesting = 0
1090 p.comments = n.Comments
1091 p.useNodeComments = n.Comments == nil
1092 p.file(n)
1093 default:
1094 p.errors <- fmt.Errorf("printer.Fprint: unsupported node type %T", n)
1095 runtime.Goexit()
1096 }
1097 p.flush(token.Position{Offset: infinity, Line: infinity}, token.EOF)
1098 p.errors <- nil // no errors
1099 }()
1100 err := <-p.errors // wait for completion of goroutine
1101
1102 // flush tabwriter, if any
1103 if tw != nil {
1104 tw.Flush() // ignore errors
1105 }
1106
1107 return p.written, err
1108 }
1109
1110
1111 // Fprint "pretty-prints" an AST node to output.
1112 // It calls Config.Fprint with default settings.
1113 //
1114 func Fprint(output io.Writer, node interface{}) os.Error {
1115 _, err := (&Config{Tabwidth: 8}).Fprint(output, node) // don't care about number of bytes written
1116 return err
1117 }