atoi.go 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314
  1. // Copyright 2009 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package strconv
  5. import "errors"
  6. // lower(c) is a lower-case letter if and only if
  7. // c is either that lower-case letter or the equivalent upper-case letter.
  8. // Instead of writing c == 'x' || c == 'X' one can write lower(c) == 'x'.
  9. // Note that lower of non-letters can produce other non-letters.
  10. func lower(c byte) byte {
  11. return c | ('x' - 'X')
  12. }
  13. // ErrRange indicates that a value is out of range for the target type.
  14. var ErrRange = errors.New("value out of range")
  15. // ErrSyntax indicates that a value does not have the right syntax for the target type.
  16. var ErrSyntax = errors.New("invalid syntax")
  17. // A NumError records a failed conversion.
  18. type NumError struct {
  19. Func string // the failing function (ParseBool, ParseInt, ParseUint, ParseFloat, ParseComplex)
  20. Num string // the input
  21. Err error // the reason the conversion failed (e.g. ErrRange, ErrSyntax, etc.)
  22. }
  23. func (e *NumError) Error() string {
  24. return "strconv." + e.Func + ": " + "parsing " + Quote(e.Num) + ": " + e.Err.Error()
  25. }
  26. func (e *NumError) Unwrap() error { return e.Err }
  27. func syntaxError(fn, str string) *NumError {
  28. return &NumError{fn, str, ErrSyntax}
  29. }
  30. func rangeError(fn, str string) *NumError {
  31. return &NumError{fn, str, ErrRange}
  32. }
  33. func baseError(fn, str string, base int) *NumError {
  34. return &NumError{fn, str, errors.New("invalid base " + Itoa(base))}
  35. }
  36. func bitSizeError(fn, str string, bitSize int) *NumError {
  37. return &NumError{fn, str, errors.New("invalid bit size " + Itoa(bitSize))}
  38. }
  39. const intSize = 32 << (^uint(0) >> 63)
  40. // IntSize is the size in bits of an int or uint value.
  41. const IntSize = intSize
  42. const maxUint64 = 1<<64 - 1
  43. // ParseUint is like ParseInt but for unsigned numbers.
  44. //
  45. // A sign prefix is not permitted.
  46. func ParseUint(s string, base int, bitSize int) (uint64, error) {
  47. const fnParseUint = "ParseUint"
  48. if s == "" {
  49. return 0, syntaxError(fnParseUint, s)
  50. }
  51. base0 := base == 0
  52. s0 := s
  53. switch {
  54. case 2 <= base && base <= 36:
  55. // valid base; nothing to do
  56. case base == 0:
  57. // Look for octal, hex prefix.
  58. base = 10
  59. if s[0] == '0' {
  60. switch {
  61. case len(s) >= 3 && lower(s[1]) == 'b':
  62. base = 2
  63. s = s[2:]
  64. case len(s) >= 3 && lower(s[1]) == 'o':
  65. base = 8
  66. s = s[2:]
  67. case len(s) >= 3 && lower(s[1]) == 'x':
  68. base = 16
  69. s = s[2:]
  70. default:
  71. base = 8
  72. s = s[1:]
  73. }
  74. }
  75. default:
  76. return 0, baseError(fnParseUint, s0, base)
  77. }
  78. if bitSize == 0 {
  79. bitSize = IntSize
  80. } else if bitSize < 0 || bitSize > 64 {
  81. return 0, bitSizeError(fnParseUint, s0, bitSize)
  82. }
  83. // Cutoff is the smallest number such that cutoff*base > maxUint64.
  84. // Use compile-time constants for common cases.
  85. var cutoff uint64
  86. switch base {
  87. case 10:
  88. cutoff = maxUint64/10 + 1
  89. case 16:
  90. cutoff = maxUint64/16 + 1
  91. default:
  92. cutoff = maxUint64/uint64(base) + 1
  93. }
  94. maxVal := uint64(1)<<uint(bitSize) - 1
  95. underscores := false
  96. var n uint64
  97. for _, c := range []byte(s) {
  98. var d byte
  99. switch {
  100. case c == '_' && base0:
  101. underscores = true
  102. continue
  103. case '0' <= c && c <= '9':
  104. d = c - '0'
  105. case 'a' <= lower(c) && lower(c) <= 'z':
  106. d = lower(c) - 'a' + 10
  107. default:
  108. return 0, syntaxError(fnParseUint, s0)
  109. }
  110. if d >= byte(base) {
  111. return 0, syntaxError(fnParseUint, s0)
  112. }
  113. if n >= cutoff {
  114. // n*base overflows
  115. return maxVal, rangeError(fnParseUint, s0)
  116. }
  117. n *= uint64(base)
  118. n1 := n + uint64(d)
  119. if n1 < n || n1 > maxVal {
  120. // n+d overflows
  121. return maxVal, rangeError(fnParseUint, s0)
  122. }
  123. n = n1
  124. }
  125. if underscores && !underscoreOK(s0) {
  126. return 0, syntaxError(fnParseUint, s0)
  127. }
  128. return n, nil
  129. }
  130. // ParseInt interprets a string s in the given base (0, 2 to 36) and
  131. // bit size (0 to 64) and returns the corresponding value i.
  132. //
  133. // The string may begin with a leading sign: "+" or "-".
  134. //
  135. // If the base argument is 0, the true base is implied by the string's
  136. // prefix following the sign (if present): 2 for "0b", 8 for "0" or "0o",
  137. // 16 for "0x", and 10 otherwise. Also, for argument base 0 only,
  138. // underscore characters are permitted as defined by the Go syntax for
  139. // integer literals.
  140. //
  141. // The bitSize argument specifies the integer type
  142. // that the result must fit into. Bit sizes 0, 8, 16, 32, and 64
  143. // correspond to int, int8, int16, int32, and int64.
  144. // If bitSize is below 0 or above 64, an error is returned.
  145. //
  146. // The errors that ParseInt returns have concrete type *NumError
  147. // and include err.Num = s. If s is empty or contains invalid
  148. // digits, err.Err = ErrSyntax and the returned value is 0;
  149. // if the value corresponding to s cannot be represented by a
  150. // signed integer of the given size, err.Err = ErrRange and the
  151. // returned value is the maximum magnitude integer of the
  152. // appropriate bitSize and sign.
  153. func ParseInt(s string, base int, bitSize int) (i int64, err error) {
  154. const fnParseInt = "ParseInt"
  155. if s == "" {
  156. return 0, syntaxError(fnParseInt, s)
  157. }
  158. // Pick off leading sign.
  159. s0 := s
  160. neg := false
  161. if s[0] == '+' {
  162. s = s[1:]
  163. } else if s[0] == '-' {
  164. neg = true
  165. s = s[1:]
  166. }
  167. // Convert unsigned and check range.
  168. var un uint64
  169. un, err = ParseUint(s, base, bitSize)
  170. if err != nil && err.(*NumError).Err != ErrRange {
  171. err.(*NumError).Func = fnParseInt
  172. err.(*NumError).Num = s0
  173. return 0, err
  174. }
  175. if bitSize == 0 {
  176. bitSize = IntSize
  177. }
  178. cutoff := uint64(1 << uint(bitSize-1))
  179. if !neg && un >= cutoff {
  180. return int64(cutoff - 1), rangeError(fnParseInt, s0)
  181. }
  182. if neg && un > cutoff {
  183. return -int64(cutoff), rangeError(fnParseInt, s0)
  184. }
  185. n := int64(un)
  186. if neg {
  187. n = -n
  188. }
  189. return n, nil
  190. }
  191. // Atoi is equivalent to ParseInt(s, 10, 0), converted to type int.
  192. func Atoi(s string) (int, error) {
  193. const fnAtoi = "Atoi"
  194. sLen := len(s)
  195. if intSize == 32 && (0 < sLen && sLen < 10) ||
  196. intSize == 64 && (0 < sLen && sLen < 19) {
  197. // Fast path for small integers that fit int type.
  198. s0 := s
  199. if s[0] == '-' || s[0] == '+' {
  200. s = s[1:]
  201. if len(s) < 1 {
  202. return 0, &NumError{fnAtoi, s0, ErrSyntax}
  203. }
  204. }
  205. n := 0
  206. for _, ch := range []byte(s) {
  207. ch -= '0'
  208. if ch > 9 {
  209. return 0, &NumError{fnAtoi, s0, ErrSyntax}
  210. }
  211. n = n*10 + int(ch)
  212. }
  213. if s0[0] == '-' {
  214. n = -n
  215. }
  216. return n, nil
  217. }
  218. // Slow path for invalid, big, or underscored integers.
  219. i64, err := ParseInt(s, 10, 0)
  220. if nerr, ok := err.(*NumError); ok {
  221. nerr.Func = fnAtoi
  222. }
  223. return int(i64), err
  224. }
  225. // underscoreOK reports whether the underscores in s are allowed.
  226. // Checking them in this one function lets all the parsers skip over them simply.
  227. // Underscore must appear only between digits or between a base prefix and a digit.
  228. func underscoreOK(s string) bool {
  229. // saw tracks the last character (class) we saw:
  230. // ^ for beginning of number,
  231. // 0 for a digit or base prefix,
  232. // _ for an underscore,
  233. // ! for none of the above.
  234. saw := '^'
  235. i := 0
  236. // Optional sign.
  237. if len(s) >= 1 && (s[0] == '-' || s[0] == '+') {
  238. s = s[1:]
  239. }
  240. // Optional base prefix.
  241. hex := false
  242. if len(s) >= 2 && s[0] == '0' && (lower(s[1]) == 'b' || lower(s[1]) == 'o' || lower(s[1]) == 'x') {
  243. i = 2
  244. saw = '0' // base prefix counts as a digit for "underscore as digit separator"
  245. hex = lower(s[1]) == 'x'
  246. }
  247. // Number proper.
  248. for ; i < len(s); i++ {
  249. // Digits are always okay.
  250. if '0' <= s[i] && s[i] <= '9' || hex && 'a' <= lower(s[i]) && lower(s[i]) <= 'f' {
  251. saw = '0'
  252. continue
  253. }
  254. // Underscore must follow digit.
  255. if s[i] == '_' {
  256. if saw != '0' {
  257. return false
  258. }
  259. saw = '_'
  260. continue
  261. }
  262. // Underscore must also be followed by digit.
  263. if saw == '_' {
  264. return false
  265. }
  266. // Saw non-digit, non-underscore.
  267. saw = '!'
  268. }
  269. return saw != '_'
  270. }