123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314 |
- // Copyright 2009 The Go Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style
- // license that can be found in the LICENSE file.
- package strconv
- import "errors"
- // lower(c) is a lower-case letter if and only if
- // c is either that lower-case letter or the equivalent upper-case letter.
- // Instead of writing c == 'x' || c == 'X' one can write lower(c) == 'x'.
- // Note that lower of non-letters can produce other non-letters.
- func lower(c byte) byte {
- return c | ('x' - 'X')
- }
- // ErrRange indicates that a value is out of range for the target type.
- var ErrRange = errors.New("value out of range")
- // ErrSyntax indicates that a value does not have the right syntax for the target type.
- var ErrSyntax = errors.New("invalid syntax")
- // A NumError records a failed conversion.
- type NumError struct {
- Func string // the failing function (ParseBool, ParseInt, ParseUint, ParseFloat, ParseComplex)
- Num string // the input
- Err error // the reason the conversion failed (e.g. ErrRange, ErrSyntax, etc.)
- }
- func (e *NumError) Error() string {
- return "strconv." + e.Func + ": " + "parsing " + Quote(e.Num) + ": " + e.Err.Error()
- }
- func (e *NumError) Unwrap() error { return e.Err }
- func syntaxError(fn, str string) *NumError {
- return &NumError{fn, str, ErrSyntax}
- }
- func rangeError(fn, str string) *NumError {
- return &NumError{fn, str, ErrRange}
- }
- func baseError(fn, str string, base int) *NumError {
- return &NumError{fn, str, errors.New("invalid base " + Itoa(base))}
- }
- func bitSizeError(fn, str string, bitSize int) *NumError {
- return &NumError{fn, str, errors.New("invalid bit size " + Itoa(bitSize))}
- }
- const intSize = 32 << (^uint(0) >> 63)
- // IntSize is the size in bits of an int or uint value.
- const IntSize = intSize
- const maxUint64 = 1<<64 - 1
- // ParseUint is like ParseInt but for unsigned numbers.
- //
- // A sign prefix is not permitted.
- func ParseUint(s string, base int, bitSize int) (uint64, error) {
- const fnParseUint = "ParseUint"
- if s == "" {
- return 0, syntaxError(fnParseUint, s)
- }
- base0 := base == 0
- s0 := s
- switch {
- case 2 <= base && base <= 36:
- // valid base; nothing to do
- case base == 0:
- // Look for octal, hex prefix.
- base = 10
- if s[0] == '0' {
- switch {
- case len(s) >= 3 && lower(s[1]) == 'b':
- base = 2
- s = s[2:]
- case len(s) >= 3 && lower(s[1]) == 'o':
- base = 8
- s = s[2:]
- case len(s) >= 3 && lower(s[1]) == 'x':
- base = 16
- s = s[2:]
- default:
- base = 8
- s = s[1:]
- }
- }
- default:
- return 0, baseError(fnParseUint, s0, base)
- }
- if bitSize == 0 {
- bitSize = IntSize
- } else if bitSize < 0 || bitSize > 64 {
- return 0, bitSizeError(fnParseUint, s0, bitSize)
- }
- // Cutoff is the smallest number such that cutoff*base > maxUint64.
- // Use compile-time constants for common cases.
- var cutoff uint64
- switch base {
- case 10:
- cutoff = maxUint64/10 + 1
- case 16:
- cutoff = maxUint64/16 + 1
- default:
- cutoff = maxUint64/uint64(base) + 1
- }
- maxVal := uint64(1)<<uint(bitSize) - 1
- underscores := false
- var n uint64
- for _, c := range []byte(s) {
- var d byte
- switch {
- case c == '_' && base0:
- underscores = true
- continue
- case '0' <= c && c <= '9':
- d = c - '0'
- case 'a' <= lower(c) && lower(c) <= 'z':
- d = lower(c) - 'a' + 10
- default:
- return 0, syntaxError(fnParseUint, s0)
- }
- if d >= byte(base) {
- return 0, syntaxError(fnParseUint, s0)
- }
- if n >= cutoff {
- // n*base overflows
- return maxVal, rangeError(fnParseUint, s0)
- }
- n *= uint64(base)
- n1 := n + uint64(d)
- if n1 < n || n1 > maxVal {
- // n+d overflows
- return maxVal, rangeError(fnParseUint, s0)
- }
- n = n1
- }
- if underscores && !underscoreOK(s0) {
- return 0, syntaxError(fnParseUint, s0)
- }
- return n, nil
- }
- // ParseInt interprets a string s in the given base (0, 2 to 36) and
- // bit size (0 to 64) and returns the corresponding value i.
- //
- // The string may begin with a leading sign: "+" or "-".
- //
- // If the base argument is 0, the true base is implied by the string's
- // prefix following the sign (if present): 2 for "0b", 8 for "0" or "0o",
- // 16 for "0x", and 10 otherwise. Also, for argument base 0 only,
- // underscore characters are permitted as defined by the Go syntax for
- // integer literals.
- //
- // The bitSize argument specifies the integer type
- // that the result must fit into. Bit sizes 0, 8, 16, 32, and 64
- // correspond to int, int8, int16, int32, and int64.
- // If bitSize is below 0 or above 64, an error is returned.
- //
- // The errors that ParseInt returns have concrete type *NumError
- // and include err.Num = s. If s is empty or contains invalid
- // digits, err.Err = ErrSyntax and the returned value is 0;
- // if the value corresponding to s cannot be represented by a
- // signed integer of the given size, err.Err = ErrRange and the
- // returned value is the maximum magnitude integer of the
- // appropriate bitSize and sign.
- func ParseInt(s string, base int, bitSize int) (i int64, err error) {
- const fnParseInt = "ParseInt"
- if s == "" {
- return 0, syntaxError(fnParseInt, s)
- }
- // Pick off leading sign.
- s0 := s
- neg := false
- if s[0] == '+' {
- s = s[1:]
- } else if s[0] == '-' {
- neg = true
- s = s[1:]
- }
- // Convert unsigned and check range.
- var un uint64
- un, err = ParseUint(s, base, bitSize)
- if err != nil && err.(*NumError).Err != ErrRange {
- err.(*NumError).Func = fnParseInt
- err.(*NumError).Num = s0
- return 0, err
- }
- if bitSize == 0 {
- bitSize = IntSize
- }
- cutoff := uint64(1 << uint(bitSize-1))
- if !neg && un >= cutoff {
- return int64(cutoff - 1), rangeError(fnParseInt, s0)
- }
- if neg && un > cutoff {
- return -int64(cutoff), rangeError(fnParseInt, s0)
- }
- n := int64(un)
- if neg {
- n = -n
- }
- return n, nil
- }
- // Atoi is equivalent to ParseInt(s, 10, 0), converted to type int.
- func Atoi(s string) (int, error) {
- const fnAtoi = "Atoi"
- sLen := len(s)
- if intSize == 32 && (0 < sLen && sLen < 10) ||
- intSize == 64 && (0 < sLen && sLen < 19) {
- // Fast path for small integers that fit int type.
- s0 := s
- if s[0] == '-' || s[0] == '+' {
- s = s[1:]
- if len(s) < 1 {
- return 0, &NumError{fnAtoi, s0, ErrSyntax}
- }
- }
- n := 0
- for _, ch := range []byte(s) {
- ch -= '0'
- if ch > 9 {
- return 0, &NumError{fnAtoi, s0, ErrSyntax}
- }
- n = n*10 + int(ch)
- }
- if s0[0] == '-' {
- n = -n
- }
- return n, nil
- }
- // Slow path for invalid, big, or underscored integers.
- i64, err := ParseInt(s, 10, 0)
- if nerr, ok := err.(*NumError); ok {
- nerr.Func = fnAtoi
- }
- return int(i64), err
- }
- // underscoreOK reports whether the underscores in s are allowed.
- // Checking them in this one function lets all the parsers skip over them simply.
- // Underscore must appear only between digits or between a base prefix and a digit.
- func underscoreOK(s string) bool {
- // saw tracks the last character (class) we saw:
- // ^ for beginning of number,
- // 0 for a digit or base prefix,
- // _ for an underscore,
- // ! for none of the above.
- saw := '^'
- i := 0
- // Optional sign.
- if len(s) >= 1 && (s[0] == '-' || s[0] == '+') {
- s = s[1:]
- }
- // Optional base prefix.
- hex := false
- if len(s) >= 2 && s[0] == '0' && (lower(s[1]) == 'b' || lower(s[1]) == 'o' || lower(s[1]) == 'x') {
- i = 2
- saw = '0' // base prefix counts as a digit for "underscore as digit separator"
- hex = lower(s[1]) == 'x'
- }
- // Number proper.
- for ; i < len(s); i++ {
- // Digits are always okay.
- if '0' <= s[i] && s[i] <= '9' || hex && 'a' <= lower(s[i]) && lower(s[i]) <= 'f' {
- saw = '0'
- continue
- }
- // Underscore must follow digit.
- if s[i] == '_' {
- if saw != '0' {
- return false
- }
- saw = '_'
- continue
- }
- // Underscore must also be followed by digit.
- if saw == '_' {
- return false
- }
- // Saw non-digit, non-underscore.
- saw = '!'
- }
- return saw != '_'
- }
|