libgo/go/fmt/scan.go

   1 // Copyright 2010 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 package fmt
   6
   7 import (
   8         "bytes"
   9         "io"
  10         "os"
  11         "reflect"
  12         "strconv"
  13         "strings"
  14         "unicode"
  15         "utf8"
  16 )
  17
  18 // readRuner is the interface to something that can read runes.  If
  19 // the object provided to Scan does not satisfy this interface, the
  20 // object will be wrapped by a readRune object.
  21 type readRuner interface {
  22         ReadRune() (rune int, size int, err os.Error)
  23 }
  24
  25 // unreadRuner is the interface to something that can unread runes.
  26 // If the object provided to Scan does not satisfy this interface,
  27 // a local buffer will be used to back up the input, but its contents
  28 // will be lost when Scan returns.
  29 type unreadRuner interface {
  30         UnreadRune() os.Error
  31 }
  32
  33 // ScanState represents the scanner state passed to custom scanners.
  34 // Scanners may do rune-at-a-time scanning or ask the ScanState
  35 // to discover the next space-delimited token.
  36 type ScanState interface {
  37         // GetRune reads the next rune (Unicode code point) from the input.
  38         GetRune() (rune int, err os.Error)
  39         // UngetRune causes the next call to GetRune to return the rune.
  40         UngetRune()
  41         // Width returns the value of the width option and whether it has been set.
  42         // The unit is Unicode code points.
  43         Width() (wid int, ok bool)
  44         // Token returns the next space-delimited token from the input. If
  45         // a width has been specified, the returned token will be no longer
  46         // than the width.
  47         Token() (token string, err os.Error)
  48 }
  49
  50 // Scanner is implemented by any value that has a Scan method, which scans
  51 // the input for the representation of a value and stores the result in the
  52 // receiver, which must be a pointer to be useful.  The Scan method is called
  53 // for any argument to Scan or Scanln that implements it.
  54 type Scanner interface {
  55         Scan(state ScanState, verb int) os.Error
  56 }
  57
  58 // Scan scans text read from standard input, storing successive
  59 // space-separated values into successive arguments.  Newlines count
  60 // as space.  It returns the number of items successfully scanned.
  61 // If that is less than the number of arguments, err will report why.
  62 func Scan(a ...interface{}) (n int, err os.Error) {
  63         return Fscan(os.Stdin, a...)
  64 }
  65
  66 // Scanln is similar to Scan, but stops scanning at a newline and
  67 // after the final item there must be a newline or EOF.
  68 func Scanln(a ...interface{}) (n int, err os.Error) {
  69         return Fscanln(os.Stdin, a...)
  70 }
  71
  72 // Scanf scans text read from standard input, storing successive
  73 // space-separated values into successive arguments as determined by
  74 // the format.  It returns the number of items successfully scanned.
  75 func Scanf(format string, a ...interface{}) (n int, err os.Error) {
  76         return Fscanf(os.Stdin, format, a...)
  77 }
  78
  79 // Sscan scans the argument string, storing successive space-separated
  80 // values into successive arguments.  Newlines count as space.  It
  81 // returns the number of items successfully scanned.  If that is less
  82 // than the number of arguments, err will report why.
  83 func Sscan(str string, a ...interface{}) (n int, err os.Error) {
  84         return Fscan(strings.NewReader(str), a...)
  85 }
  86
  87 // Sscanln is similar to Sscan, but stops scanning at a newline and
  88 // after the final item there must be a newline or EOF.
  89 func Sscanln(str string, a ...interface{}) (n int, err os.Error) {
  90         return Fscanln(strings.NewReader(str), a...)
  91 }
  92
  93 // Sscanf scans the argument string, storing successive space-separated
  94 // values into successive arguments as determined by the format.  It
  95 // returns the number of items successfully parsed.
  96 func Sscanf(str string, format string, a ...interface{}) (n int, err os.Error) {
  97         return Fscanf(strings.NewReader(str), format, a...)
  98 }
  99
 100 // Fscan scans text read from r, storing successive space-separated
 101 // values into successive arguments.  Newlines count as space.  It
 102 // returns the number of items successfully scanned.  If that is less
 103 // than the number of arguments, err will report why.
 104 func Fscan(r io.Reader, a ...interface{}) (n int, err os.Error) {
 105         s := newScanState(r, true)
 106         n, err = s.doScan(a)
 107         s.free()
 108         return
 109 }
 110
 111 // Fscanln is similar to Fscan, but stops scanning at a newline and
 112 // after the final item there must be a newline or EOF.
 113 func Fscanln(r io.Reader, a ...interface{}) (n int, err os.Error) {
 114         s := newScanState(r, false)
 115         n, err = s.doScan(a)
 116         s.free()
 117         return
 118 }
 119
 120 // Fscanf scans text read from r, storing successive space-separated
 121 // values into successive arguments as determined by the format.  It
 122 // returns the number of items successfully parsed.
 123 func Fscanf(r io.Reader, format string, a ...interface{}) (n int, err os.Error) {
 124         s := newScanState(r, false)
 125         n, err = s.doScanf(format, a)
 126         s.free()
 127         return
 128 }
 129
 130 // scanError represents an error generated by the scanning software.
 131 // It's used as a unique signature to identify such errors when recovering.
 132 type scanError struct {
 133         err os.Error
 134 }
 135
 136 const EOF = -1
 137
 138 // ss is the internal implementation of ScanState.
 139 type ss struct {
 140         rr         readRuner    // where to read input
 141         buf        bytes.Buffer // token accumulator
 142         nlIsSpace  bool         // whether newline counts as white space
 143         peekRune   int          // one-rune lookahead
 144         prevRune   int          // last rune returned by GetRune
 145         atEOF      bool         // already read EOF
 146         maxWid     int          // max width of field, in runes
 147         widPresent bool         // width was specified
 148         wid        int          // width consumed so far; used in accept()
 149 }
 150
 151 func (s *ss) GetRune() (rune int, err os.Error) {
 152         if s.peekRune >= 0 {
 153                 rune = s.peekRune
 154                 s.prevRune = rune
 155                 s.peekRune = -1
 156                 return
 157         }
 158         rune, _, err = s.rr.ReadRune()
 159         if err == nil {
 160                 s.prevRune = rune
 161         }
 162         return
 163 }
 164
 165 func (s *ss) Width() (wid int, ok bool) {
 166         return s.maxWid, s.widPresent
 167 }
 168
 169 // The public method returns an error; this private one panics.
 170 // If getRune reaches EOF, the return value is EOF (-1).
 171 func (s *ss) getRune() (rune int) {
 172         if s.atEOF {
 173                 return EOF
 174         }
 175         if s.peekRune >= 0 {
 176                 rune = s.peekRune
 177                 s.prevRune = rune
 178                 s.peekRune = -1
 179                 return
 180         }
 181         rune, _, err := s.rr.ReadRune()
 182         if err == nil {
 183                 s.prevRune = rune
 184         } else if err != nil {
 185                 if err == os.EOF {
 186                         s.atEOF = true
 187                         return EOF
 188                 }
 189                 s.error(err)
 190         }
 191         return
 192 }
 193
 194 // mustGetRune turns os.EOF into a panic(io.ErrUnexpectedEOF).
 195 // It is called in cases such as string scanning where an EOF is a
 196 // syntax error.
 197 func (s *ss) mustGetRune() (rune int) {
 198         if s.atEOF {
 199                 s.error(io.ErrUnexpectedEOF)
 200         }
 201         if s.peekRune >= 0 {
 202                 rune = s.peekRune
 203                 s.peekRune = -1
 204                 return
 205         }
 206         rune, _, err := s.rr.ReadRune()
 207         if err != nil {
 208                 if err == os.EOF {
 209                         err = io.ErrUnexpectedEOF
 210                 }
 211                 s.error(err)
 212         }
 213         return
 214 }
 215
 216
 217 func (s *ss) UngetRune() {
 218         if u, ok := s.rr.(unreadRuner); ok {
 219                 u.UnreadRune()
 220         } else {
 221                 s.peekRune = s.prevRune
 222         }
 223 }
 224
 225 func (s *ss) error(err os.Error) {
 226         panic(scanError{err})
 227 }
 228
 229 func (s *ss) errorString(err string) {
 230         panic(scanError{os.ErrorString(err)})
 231 }
 232
 233 func (s *ss) Token() (tok string, err os.Error) {
 234         defer func() {
 235                 if e := recover(); e != nil {
 236                         if se, ok := e.(scanError); ok {
 237                                 err = se.err
 238                         } else {
 239                                 panic(e)
 240                         }
 241                 }
 242         }()
 243         tok = s.token()
 244         return
 245 }
 246
 247 // readRune is a structure to enable reading UTF-8 encoded code points
 248 // from an io.Reader.  It is used if the Reader given to the scanner does
 249 // not already implement ReadRuner.
 250 type readRune struct {
 251         reader  io.Reader
 252         buf     [utf8.UTFMax]byte // used only inside ReadRune
 253         pending int               // number of bytes in pendBuf; only >0 for bad UTF-8
 254         pendBuf [utf8.UTFMax]byte // bytes left over
 255 }
 256
 257 // readByte returns the next byte from the input, which may be
 258 // left over from a previous read if the UTF-8 was ill-formed.
 259 func (r *readRune) readByte() (b byte, err os.Error) {
 260         if r.pending > 0 {
 261                 b = r.pendBuf[0]
 262                 copy(r.pendBuf[0:], r.pendBuf[1:])
 263                 r.pending--
 264                 return
 265         }
 266         _, err = r.reader.Read(r.pendBuf[0:1])
 267         return r.pendBuf[0], err
 268 }
 269
 270 // unread saves the bytes for the next read.
 271 func (r *readRune) unread(buf []byte) {
 272         copy(r.pendBuf[r.pending:], buf)
 273         r.pending += len(buf)
 274 }
 275
 276 // ReadRune returns the next UTF-8 encoded code point from the
 277 // io.Reader inside r.
 278 func (r *readRune) ReadRune() (rune int, size int, err os.Error) {
 279         r.buf[0], err = r.readByte()
 280         if err != nil {
 281                 return 0, 0, err
 282         }
 283         if r.buf[0] < utf8.RuneSelf { // fast check for common ASCII case
 284                 rune = int(r.buf[0])
 285                 return
 286         }
 287         var n int
 288         for n = 1; !utf8.FullRune(r.buf[0:n]); n++ {
 289                 r.buf[n], err = r.readByte()
 290                 if err != nil {
 291                         if err == os.EOF {
 292                                 err = nil
 293                                 break
 294                         }
 295                         return
 296                 }
 297         }
 298         rune, size = utf8.DecodeRune(r.buf[0:n])
 299         if size < n { // an error
 300                 r.unread(r.buf[size:n])
 301         }
 302         return
 303 }
 304
 305
 306 // A leaky bucket of reusable ss structures.
 307 var ssFree = make(chan *ss, 100)
 308
 309 // Allocate a new ss struct.  Probably can grab the previous one from ssFree.
 310 func newScanState(r io.Reader, nlIsSpace bool) *ss {
 311         s, ok := <-ssFree
 312         if !ok {
 313                 s = new(ss)
 314         }
 315         if rr, ok := r.(readRuner); ok {
 316                 s.rr = rr
 317         } else {
 318                 s.rr = &readRune{reader: r}
 319         }
 320         s.nlIsSpace = nlIsSpace
 321         s.peekRune = -1
 322         s.atEOF = false
 323         s.maxWid = 0
 324         s.widPresent = false
 325         return s
 326 }
 327
 328 // Save used ss structs in ssFree; avoid an allocation per invocation.
 329 func (s *ss) free() {
 330         // Don't hold on to ss structs with large buffers.
 331         if cap(s.buf.Bytes()) > 1024 {
 332                 return
 333         }
 334         s.buf.Reset()
 335         s.rr = nil
 336         _ = ssFree <- s
 337 }
 338
 339 // skipSpace skips spaces and maybe newlines.
 340 func (s *ss) skipSpace(stopAtNewline bool) {
 341         for {
 342                 rune := s.getRune()
 343                 if rune == EOF {
 344                         return
 345                 }
 346                 if rune == '\n' {
 347                         if stopAtNewline {
 348                                 break
 349                         }
 350                         if s.nlIsSpace {
 351                                 continue
 352                         }
 353                         s.errorString("unexpected newline")
 354                         return
 355                 }
 356                 if !unicode.IsSpace(rune) {
 357                         s.UngetRune()
 358                         break
 359                 }
 360         }
 361 }
 362
 363 // token returns the next space-delimited string from the input.  It
 364 // skips white space.  For Scanln, it stops at newlines.  For Scan,
 365 // newlines are treated as spaces.
 366 func (s *ss) token() string {
 367         s.skipSpace(false)
 368         // read until white space or newline
 369         for nrunes := 0; !s.widPresent || nrunes < s.maxWid; nrunes++ {
 370                 rune := s.getRune()
 371                 if rune == EOF {
 372                         break
 373                 }
 374                 if unicode.IsSpace(rune) {
 375                         s.UngetRune()
 376                         break
 377                 }
 378                 s.buf.WriteRune(rune)
 379         }
 380         return s.buf.String()
 381 }
 382
 383 // typeError indicates that the type of the operand did not match the format
 384 func (s *ss) typeError(field interface{}, expected string) {
 385         s.errorString("expected field of type pointer to " + expected + "; found " + reflect.Typeof(field).String())
 386 }
 387
 388 var complexError = os.ErrorString("syntax error scanning complex number")
 389 var boolError = os.ErrorString("syntax error scanning boolean")
 390
 391 // accepts checks the next rune in the input.  If it's a byte (sic) in the string, it puts it in the
 392 // buffer and returns true. Otherwise it return false.
 393 func (s *ss) accept(ok string) bool {
 394         if s.wid >= s.maxWid {
 395                 return false
 396         }
 397         rune := s.getRune()
 398         if rune == EOF {
 399                 return false
 400         }
 401         for i := 0; i < len(ok); i++ {
 402                 if int(ok[i]) == rune {
 403                         s.buf.WriteRune(rune)
 404                         s.wid++
 405                         return true
 406                 }
 407         }
 408         if rune != EOF {
 409                 s.UngetRune()
 410         }
 411         return false
 412 }
 413
 414 // okVerb verifies that the verb is present in the list, setting s.err appropriately if not.
 415 func (s *ss) okVerb(verb int, okVerbs, typ string) bool {
 416         for _, v := range okVerbs {
 417                 if v == verb {
 418                         return true
 419                 }
 420         }
 421         s.errorString("bad verb %" + string(verb) + " for " + typ)
 422         return false
 423 }
 424
 425 // scanBool returns the value of the boolean represented by the next token.
 426 func (s *ss) scanBool(verb int) bool {
 427         if !s.okVerb(verb, "tv", "boolean") {
 428                 return false
 429         }
 430         // Syntax-checking a boolean is annoying.  We're not fastidious about case.
 431         switch s.mustGetRune() {
 432         case '0':
 433                 return false
 434         case '1':
 435                 return true
 436         case 't', 'T':
 437                 if s.accept("rR") && (!s.accept("uU") || !s.accept("eE")) {
 438                         s.error(boolError)
 439                 }
 440                 return true
 441         case 'f', 'F':
 442                 if s.accept("aL") && (!s.accept("lL") || !s.accept("sS") || !s.accept("eE")) {
 443                         s.error(boolError)
 444                 }
 445                 return false
 446         }
 447         return false
 448 }
 449
 450 // Numerical elements
 451 const (
 452         binaryDigits      = "01"
 453         octalDigits       = "01234567"
 454         decimalDigits     = "0123456789"
 455         hexadecimalDigits = "0123456789aAbBcCdDeEfF"
 456         sign              = "+-"
 457         period            = "."
 458         exponent          = "eE"
 459 )
 460
 461 // getBase returns the numeric base represented by the verb and its digit string.
 462 func (s *ss) getBase(verb int) (base int, digits string) {
 463         s.okVerb(verb, "bdoxXv", "integer") // sets s.err
 464         base = 10
 465         digits = decimalDigits
 466         switch verb {
 467         case 'b':
 468                 base = 2
 469                 digits = binaryDigits
 470         case 'o':
 471                 base = 8
 472                 digits = octalDigits
 473         case 'x', 'X':
 474                 base = 16
 475                 digits = hexadecimalDigits
 476         }
 477         return
 478 }
 479
 480 // scanNumber returns the numerical string with specified digits starting here.
 481 func (s *ss) scanNumber(digits string) string {
 482         if !s.accept(digits) {
 483                 s.errorString("expected integer")
 484         }
 485         for s.accept(digits) {
 486         }
 487         return s.buf.String()
 488 }
 489
 490 // scanRune returns the next rune value in the input.
 491 func (s *ss) scanRune(bitSize int) int64 {
 492         rune := int64(s.mustGetRune())
 493         n := uint(bitSize)
 494         x := (rune << (64 - n)) >> (64 - n)
 495         if x != rune {
 496                 s.errorString("overflow on character value " + string(rune))
 497         }
 498         return rune
 499 }
 500
 501 // scanInt returns the value of the integer represented by the next
 502 // token, checking for overflow.  Any error is stored in s.err.
 503 func (s *ss) scanInt(verb int, bitSize int) int64 {
 504         if verb == 'c' {
 505                 return s.scanRune(bitSize)
 506         }
 507         base, digits := s.getBase(verb)
 508         s.skipSpace(false)
 509         s.accept(sign) // If there's a sign, it will be left in the token buffer.
 510         tok := s.scanNumber(digits)
 511         i, err := strconv.Btoi64(tok, base)
 512         if err != nil {
 513                 s.error(err)
 514         }
 515         n := uint(bitSize)
 516         x := (i << (64 - n)) >> (64 - n)
 517         if x != i {
 518                 s.errorString("integer overflow on token " + tok)
 519         }
 520         return i
 521 }
 522
 523 // scanUint returns the value of the unsigned integer represented
 524 // by the next token, checking for overflow.  Any error is stored in s.err.
 525 func (s *ss) scanUint(verb int, bitSize int) uint64 {
 526         if verb == 'c' {
 527                 return uint64(s.scanRune(bitSize))
 528         }
 529         base, digits := s.getBase(verb)
 530         s.skipSpace(false)
 531         tok := s.scanNumber(digits)
 532         i, err := strconv.Btoui64(tok, base)
 533         if err != nil {
 534                 s.error(err)
 535         }
 536         n := uint(bitSize)
 537         x := (i << (64 - n)) >> (64 - n)
 538         if x != i {
 539                 s.errorString("unsigned integer overflow on token " + tok)
 540         }
 541         return i
 542 }
 543
 544 // floatToken returns the floating-point number starting here, no longer than swid
 545 // if the width is specified. It's not rigorous about syntax because it doesn't check that
 546 // we have at least some digits, but Atof will do that.
 547 func (s *ss) floatToken() string {
 548         s.buf.Reset()
 549         // leading sign?
 550         s.accept(sign)
 551         // digits?
 552         for s.accept(decimalDigits) {
 553         }
 554         // decimal point?
 555         if s.accept(period) {
 556                 // fraction?
 557                 for s.accept(decimalDigits) {
 558                 }
 559         }
 560         // exponent?
 561         if s.accept(exponent) {
 562                 // leading sign?
 563                 s.accept(sign)
 564                 // digits?
 565                 for s.accept(decimalDigits) {
 566                 }
 567         }
 568         return s.buf.String()
 569 }
 570
 571 // complexTokens returns the real and imaginary parts of the complex number starting here.
 572 // The number might be parenthesized and has the format (N+Ni) where N is a floating-point
 573 // number and there are no spaces within.
 574 func (s *ss) complexTokens() (real, imag string) {
 575         // TODO: accept N and Ni independently?
 576         parens := s.accept("(")
 577         real = s.floatToken()
 578         s.buf.Reset()
 579         // Must now have a sign.
 580         if !s.accept("+-") {
 581                 s.error(complexError)
 582         }
 583         // Sign is now in buffer
 584         imagSign := s.buf.String()
 585         imag = s.floatToken()
 586         if !s.accept("i") {
 587                 s.error(complexError)
 588         }
 589         if parens && !s.accept(")") {
 590                 s.error(complexError)
 591         }
 592         return real, imagSign + imag
 593 }
 594
 595 // convertFloat converts the string to a float64value.
 596 func (s *ss) convertFloat(str string, n int) float64 {
 597         f, err := strconv.AtofN(str, n)
 598         if err != nil {
 599                 s.error(err)
 600         }
 601         return f
 602 }
 603
 604 // convertComplex converts the next token to a complex128 value.
 605 // The atof argument is a type-specific reader for the underlying type.
 606 // If we're reading complex64, atof will parse float32s and convert them
 607 // to float64's to avoid reproducing this code for each complex type.
 608 func (s *ss) scanComplex(verb int, n int) complex128 {
 609         if !s.okVerb(verb, floatVerbs, "complex") {
 610                 return 0
 611         }
 612         s.skipSpace(false)
 613         sreal, simag := s.complexTokens()
 614         real := s.convertFloat(sreal, n/2)
 615         imag := s.convertFloat(simag, n/2)
 616         return cmplx(real, imag)
 617 }
 618
 619 // convertString returns the string represented by the next input characters.
 620 // The format of the input is determined by the verb.
 621 func (s *ss) convertString(verb int) (str string) {
 622         if !s.okVerb(verb, "svqx", "string") {
 623                 return ""
 624         }
 625         s.skipSpace(false)
 626         switch verb {
 627         case 'q':
 628                 str = s.quotedString()
 629         case 'x':
 630                 str = s.hexString()
 631         default:
 632                 str = s.token() // %s and %v just return the next word
 633         }
 634         // Empty strings other than with %q are not OK.
 635         if len(str) == 0 && verb != 'q' && s.maxWid > 0 {
 636                 s.errorString("Scan: no data for string")
 637         }
 638         return
 639 }
 640
 641 // quotedString returns the double- or back-quoted string represented by the next input characters.
 642 func (s *ss) quotedString() string {
 643         quote := s.mustGetRune()
 644         switch quote {
 645         case '`':
 646                 // Back-quoted: Anything goes until EOF or back quote.
 647                 for {
 648                         rune := s.mustGetRune()
 649                         if rune == quote {
 650                                 break
 651                         }
 652                         s.buf.WriteRune(rune)
 653                 }
 654                 return s.buf.String()
 655         case '"':
 656                 // Double-quoted: Include the quotes and let strconv.Unquote do the backslash escapes.
 657                 s.buf.WriteRune(quote)
 658                 for {
 659                         rune := s.mustGetRune()
 660                         s.buf.WriteRune(rune)
 661                         if rune == '\\' {
 662                                 // In a legal backslash escape, no matter how long, only the character
 663                                 // immediately after the escape can itself be a backslash or quote.
 664                                 // Thus we only need to protect the first character after the backslash.
 665                                 rune := s.mustGetRune()
 666                                 s.buf.WriteRune(rune)
 667                         } else if rune == '"' {
 668                                 break
 669                         }
 670                 }
 671                 result, err := strconv.Unquote(s.buf.String())
 672                 if err != nil {
 673                         s.error(err)
 674                 }
 675                 return result
 676         default:
 677                 s.errorString("expected quoted string")
 678         }
 679         return ""
 680 }
 681
 682 // hexDigit returns the value of the hexadecimal digit
 683 func (s *ss) hexDigit(digit int) int {
 684         switch digit {
 685         case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
 686                 return digit - '0'
 687         case 'a', 'b', 'c', 'd', 'e', 'f':
 688                 return 10 + digit - 'a'
 689         case 'A', 'B', 'C', 'D', 'E', 'F':
 690                 return 10 + digit - 'A'
 691         }
 692         s.errorString("Scan: illegal hex digit")
 693         return 0
 694 }
 695
 696 // hexByte returns the next hex-encoded (two-character) byte from the input.
 697 // There must be either two hexadecimal digits or a space character in the input.
 698 func (s *ss) hexByte() (b byte, ok bool) {
 699         rune1 := s.getRune()
 700         if rune1 == EOF {
 701                 return
 702         }
 703         if unicode.IsSpace(rune1) {
 704                 s.UngetRune()
 705                 return
 706         }
 707         rune2 := s.mustGetRune()
 708         return byte(s.hexDigit(rune1)<<4 | s.hexDigit(rune2)), true
 709 }
 710
 711 // hexString returns the space-delimited hexpair-encoded string.
 712 func (s *ss) hexString() string {
 713         for {
 714                 b, ok := s.hexByte()
 715                 if !ok {
 716                         break
 717                 }
 718                 s.buf.WriteByte(b)
 719         }
 720         if s.buf.Len() == 0 {
 721                 s.errorString("Scan: no hex data for %x string")
 722                 return ""
 723         }
 724         return s.buf.String()
 725 }
 726
 727 const floatVerbs = "eEfFgGv"
 728
 729 // scanOne scans a single value, deriving the scanner from the type of the argument.
 730 func (s *ss) scanOne(verb int, field interface{}) {
 731         s.buf.Reset()
 732         var err os.Error
 733         // If the parameter has its own Scan method, use that.
 734         if v, ok := field.(Scanner); ok {
 735                 err = v.Scan(s, verb)
 736                 if err != nil {
 737                         s.error(err)
 738                 }
 739                 return
 740         }
 741         if !s.widPresent {
 742                 s.maxWid = 1 << 30 // Huge
 743         }
 744         s.wid = 0
 745         switch v := field.(type) {
 746         case *bool:
 747                 *v = s.scanBool(verb)
 748         case *complex:
 749                 *v = complex(s.scanComplex(verb, int(complexBits)))
 750         case *complex64:
 751                 *v = complex64(s.scanComplex(verb, 64))
 752         case *complex128:
 753                 *v = s.scanComplex(verb, 128)
 754         case *int:
 755                 *v = int(s.scanInt(verb, intBits))
 756         case *int8:
 757                 *v = int8(s.scanInt(verb, 8))
 758         case *int16:
 759                 *v = int16(s.scanInt(verb, 16))
 760         case *int32:
 761                 *v = int32(s.scanInt(verb, 32))
 762         case *int64:
 763                 *v = s.scanInt(verb, 64)
 764         case *uint:
 765                 *v = uint(s.scanUint(verb, intBits))
 766         case *uint8:
 767                 *v = uint8(s.scanUint(verb, 8))
 768         case *uint16:
 769                 *v = uint16(s.scanUint(verb, 16))
 770         case *uint32:
 771                 *v = uint32(s.scanUint(verb, 32))
 772         case *uint64:
 773                 *v = s.scanUint(verb, 64)
 774         case *uintptr:
 775                 *v = uintptr(s.scanUint(verb, uintptrBits))
 776         // Floats are tricky because you want to scan in the precision of the result, not
 777         // scan in high precision and convert, in order to preserve the correct error condition.
 778         case *float:
 779                 if s.okVerb(verb, floatVerbs, "float") {
 780                         s.skipSpace(false)
 781                         *v = float(s.convertFloat(s.floatToken(), int(floatBits)))
 782                 }
 783         case *float32:
 784                 if s.okVerb(verb, floatVerbs, "float32") {
 785                         s.skipSpace(false)
 786                         *v = float32(s.convertFloat(s.floatToken(), 32))
 787                 }
 788         case *float64:
 789                 if s.okVerb(verb, floatVerbs, "float64") {
 790                         s.skipSpace(false)
 791                         *v = s.convertFloat(s.floatToken(), 64)
 792                 }
 793         case *string:
 794                 *v = s.convertString(verb)
 795         case *[]byte:
 796                 // We scan to string and convert so we get a copy of the data.
 797                 // If we scanned to bytes, the slice would point at the buffer.
 798                 *v = []byte(s.convertString(verb))
 799         default:
 800                 val := reflect.NewValue(v)
 801                 ptr, ok := val.(*reflect.PtrValue)
 802                 if !ok {
 803                         s.errorString("Scan: type not a pointer: " + val.Type().String())
 804                         return
 805                 }
 806                 switch v := ptr.Elem().(type) {
 807                 case *reflect.BoolValue:
 808                         v.Set(s.scanBool(verb))
 809                 case *reflect.IntValue:
 810                         v.Set(s.scanInt(verb, v.Type().Bits()))
 811                 case *reflect.UintValue:
 812                         v.Set(s.scanUint(verb, v.Type().Bits()))
 813                 case *reflect.StringValue:
 814                         v.Set(s.convertString(verb))
 815                 case *reflect.SliceValue:
 816                         // For now, can only handle (renamed) []byte.
 817                         typ := v.Type().(*reflect.SliceType)
 818                         if typ.Elem().Kind() != reflect.Uint8 {
 819                                 goto CantHandle
 820                         }
 821                         str := s.convertString(verb)
 822                         v.Set(reflect.MakeSlice(typ, len(str), len(str)))
 823                         for i := 0; i < len(str); i++ {
 824                                 v.Elem(i).(*reflect.UintValue).Set(uint64(str[i]))
 825                         }
 826                 case *reflect.FloatValue:
 827                         s.skipSpace(false)
 828                         v.Set(s.convertFloat(s.floatToken(), v.Type().Bits()))
 829                 case *reflect.ComplexValue:
 830                         v.Set(s.scanComplex(verb, v.Type().Bits()))
 831                 default:
 832                 CantHandle:
 833                         s.errorString("Scan: can't handle type: " + val.Type().String())
 834                 }
 835         }
 836 }
 837
 838 // errorHandler turns local panics into error returns.  EOFs are benign.
 839 func errorHandler(errp *os.Error) {
 840         if e := recover(); e != nil {
 841                 if se, ok := e.(scanError); ok { // catch local error
 842                         if se.err != os.EOF {
 843                                 *errp = se.err
 844                         }
 845                 } else {
 846                         panic(e)
 847                 }
 848         }
 849 }
 850
 851 // doScan does the real work for scanning without a format string.
 852 // At the moment, it handles only pointers to basic types.
 853 func (s *ss) doScan(a []interface{}) (numProcessed int, err os.Error) {
 854         defer errorHandler(&err)
 855         for _, field := range a {
 856                 s.scanOne('v', field)
 857                 numProcessed++
 858         }
 859         // Check for newline if required.
 860         if !s.nlIsSpace {
 861                 for {
 862                         rune := s.getRune()
 863                         if rune == '\n' || rune == EOF {
 864                                 break
 865                         }
 866                         if !unicode.IsSpace(rune) {
 867                                 s.errorString("Scan: expected newline")
 868                                 break
 869                         }
 870                 }
 871         }
 872         return
 873 }
 874
 875 // advance determines whether the next characters in the input match
 876 // those of the format.  It returns the number of bytes (sic) consumed
 877 // in the format. Newlines included, all runs of space characters in
 878 // either input or format behave as a single space. This routine also
 879 // handles the %% case.  If the return value is zero, either format
 880 // starts with a % (with no following %) or the input is empty.
 881 // If it is negative, the input did not match the string.
 882 func (s *ss) advance(format string) (i int) {
 883         for i < len(format) {
 884                 fmtc, w := utf8.DecodeRuneInString(format[i:])
 885                 if fmtc == '%' {
 886                         // %% acts like a real percent
 887                         nextc, _ := utf8.DecodeRuneInString(format[i+w:]) // will not match % if string is empty
 888                         if nextc != '%' {
 889                                 return
 890                         }
 891                         i += w // skip the first %
 892                 }
 893                 sawSpace := false
 894                 for unicode.IsSpace(fmtc) && i < len(format) {
 895                         sawSpace = true
 896                         i += w
 897                         fmtc, w = utf8.DecodeRuneInString(format[i:])
 898                 }
 899                 if sawSpace {
 900                         // There was space in the format, so there should be space (EOF)
 901                         // in the input.
 902                         inputc := s.getRune()
 903                         if inputc == EOF {
 904                                 return
 905                         }
 906                         if !unicode.IsSpace(inputc) {
 907                                 // Space in format but not in input: error
 908                                 s.errorString("expected space in input to match format")
 909                         }
 910                         s.skipSpace(true)
 911                         continue
 912                 }
 913                 inputc := s.mustGetRune()
 914                 if fmtc != inputc {
 915                         s.UngetRune()
 916                         return -1
 917                 }
 918                 i += w
 919         }
 920         return
 921 }
 922
 923 // doScanf does the real work when scanning with a format string.
 924 //  At the moment, it handles only pointers to basic types.
 925 func (s *ss) doScanf(format string, a []interface{}) (numProcessed int, err os.Error) {
 926         defer errorHandler(&err)
 927         end := len(format) - 1
 928         // We process one item per non-trivial format
 929         for i := 0; i <= end; {
 930                 w := s.advance(format[i:])
 931                 if w > 0 {
 932                         i += w
 933                         continue
 934                 }
 935                 // Either we failed to advance, we have a percent character, or we ran out of input.
 936                 if format[i] != '%' {
 937                         // Can't advance format.  Why not?
 938                         if w < 0 {
 939                                 s.errorString("input does not match format")
 940                         }
 941                         // Otherwise at EOF; "too many operands" error handled below
 942                         break
 943                 }
 944                 i++ // % is one byte
 945
 946                 // do we have 20 (width)?
 947                 s.maxWid, s.widPresent, i = parsenum(format, i, end)
 948
 949                 c, w := utf8.DecodeRuneInString(format[i:])
 950                 i += w
 951
 952                 if numProcessed >= len(a) { // out of operands
 953                         s.errorString("too few operands for format %" + format[i-w:])
 954                         break
 955                 }
 956                 field := a[numProcessed]
 957
 958                 s.scanOne(c, field)
 959                 numProcessed++
 960         }
 961         if numProcessed < len(a) {
 962                 s.errorString("too many operands")
 963         }
 964         return
 965 }