atof.go 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710
  1. // Copyright 2009 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package strconv
  5. // decimal to binary floating point conversion.
  6. // Algorithm:
  7. // 1) Store input in multiprecision decimal.
  8. // 2) Multiply/divide decimal by powers of two until in range [0.5, 1)
  9. // 3) Multiply by 2^precision and round to get mantissa.
  10. import "math"
  11. import "runtime"
  12. var optimize = true // set to false to force slow-path conversions for testing
  13. // commonPrefixLenIgnoreCase returns the length of the common
  14. // prefix of s and prefix, with the character case of s ignored.
  15. // The prefix argument must be all lower-case.
  16. func commonPrefixLenIgnoreCase(s, prefix string) int {
  17. n := len(prefix)
  18. if n > len(s) {
  19. n = len(s)
  20. }
  21. for i := 0; i < n; i++ {
  22. c := s[i]
  23. if 'A' <= c && c <= 'Z' {
  24. c += 'a' - 'A'
  25. }
  26. if c != prefix[i] {
  27. return i
  28. }
  29. }
  30. return n
  31. }
  32. // special returns the floating-point value for the special,
  33. // possibly signed floating-point representations inf, infinity,
  34. // and NaN. The result is ok if a prefix of s contains one
  35. // of these representations and n is the length of that prefix.
  36. // The character case is ignored.
  37. func special(s string) (f float64, n int, ok bool) {
  38. if len(s) == 0 {
  39. return 0, 0, false
  40. }
  41. sign := 1
  42. nsign := 0
  43. switch s[0] {
  44. case '+', '-':
  45. if s[0] == '-' {
  46. sign = -1
  47. }
  48. nsign = 1
  49. s = s[1:]
  50. fallthrough
  51. case 'i', 'I':
  52. n := commonPrefixLenIgnoreCase(s, "infinity")
  53. // Anything longer than "inf" is ok, but if we
  54. // don't have "infinity", only consume "inf".
  55. if 3 < n && n < 8 {
  56. n = 3
  57. }
  58. if n == 3 || n == 8 {
  59. return math.Inf(sign), nsign + n, true
  60. }
  61. case 'n', 'N':
  62. if commonPrefixLenIgnoreCase(s, "nan") == 3 {
  63. return math.NaN(), 3, true
  64. }
  65. }
  66. return 0, 0, false
  67. }
  68. func (b *decimal) set(s string) (ok bool) {
  69. i := 0
  70. b.neg = false
  71. b.trunc = false
  72. // optional sign
  73. if i >= len(s) {
  74. return
  75. }
  76. switch {
  77. case s[i] == '+':
  78. i++
  79. case s[i] == '-':
  80. b.neg = true
  81. i++
  82. }
  83. // digits
  84. sawdot := false
  85. sawdigits := false
  86. for ; i < len(s); i++ {
  87. switch {
  88. case s[i] == '_':
  89. // readFloat already checked underscores
  90. continue
  91. case s[i] == '.':
  92. if sawdot {
  93. return
  94. }
  95. sawdot = true
  96. b.dp = b.nd
  97. continue
  98. case '0' <= s[i] && s[i] <= '9':
  99. sawdigits = true
  100. if s[i] == '0' && b.nd == 0 { // ignore leading zeros
  101. b.dp--
  102. continue
  103. }
  104. if b.nd < len(b.d) {
  105. b.d[b.nd] = s[i]
  106. b.nd++
  107. } else if s[i] != '0' {
  108. b.trunc = true
  109. }
  110. continue
  111. }
  112. break
  113. }
  114. if !sawdigits {
  115. return
  116. }
  117. if !sawdot {
  118. b.dp = b.nd
  119. }
  120. // optional exponent moves decimal point.
  121. // if we read a very large, very long number,
  122. // just be sure to move the decimal point by
  123. // a lot (say, 100000). it doesn't matter if it's
  124. // not the exact number.
  125. if i < len(s) && lower(s[i]) == 'e' {
  126. i++
  127. if i >= len(s) {
  128. return
  129. }
  130. esign := 1
  131. if s[i] == '+' {
  132. i++
  133. } else if s[i] == '-' {
  134. i++
  135. esign = -1
  136. }
  137. if i >= len(s) || s[i] < '0' || s[i] > '9' {
  138. return
  139. }
  140. e := 0
  141. for ; i < len(s) && ('0' <= s[i] && s[i] <= '9' || s[i] == '_'); i++ {
  142. if s[i] == '_' {
  143. // readFloat already checked underscores
  144. continue
  145. }
  146. if e < 10000 {
  147. e = e*10 + int(s[i]) - '0'
  148. }
  149. }
  150. b.dp += e * esign
  151. }
  152. if i != len(s) {
  153. return
  154. }
  155. ok = true
  156. return
  157. }
  158. // readFloat reads a decimal or hexadecimal mantissa and exponent from a float
  159. // string representation in s; the number may be followed by other characters.
  160. // readFloat reports the number of bytes consumed (i), and whether the number
  161. // is valid (ok).
  162. func readFloat(s string) (mantissa uint64, exp int, neg, trunc, hex bool, i int, ok bool) {
  163. underscores := false
  164. // optional sign
  165. if i >= len(s) {
  166. return
  167. }
  168. switch {
  169. case s[i] == '+':
  170. i++
  171. case s[i] == '-':
  172. neg = true
  173. i++
  174. }
  175. // digits
  176. base := uint64(10)
  177. maxMantDigits := 19 // 10^19 fits in uint64
  178. expChar := byte('e')
  179. if i+2 < len(s) && s[i] == '0' && lower(s[i+1]) == 'x' {
  180. base = 16
  181. maxMantDigits = 16 // 16^16 fits in uint64
  182. i += 2
  183. expChar = 'p'
  184. hex = true
  185. }
  186. sawdot := false
  187. sawdigits := false
  188. nd := 0
  189. ndMant := 0
  190. dp := 0
  191. loop:
  192. for ; i < len(s); i++ {
  193. switch c := s[i]; true {
  194. case c == '_':
  195. underscores = true
  196. continue
  197. case c == '.':
  198. if sawdot {
  199. break loop
  200. }
  201. sawdot = true
  202. dp = nd
  203. continue
  204. case '0' <= c && c <= '9':
  205. sawdigits = true
  206. if c == '0' && nd == 0 { // ignore leading zeros
  207. dp--
  208. continue
  209. }
  210. nd++
  211. if ndMant < maxMantDigits {
  212. mantissa *= base
  213. mantissa += uint64(c - '0')
  214. ndMant++
  215. } else if c != '0' {
  216. trunc = true
  217. }
  218. continue
  219. case base == 16 && 'a' <= lower(c) && lower(c) <= 'f':
  220. sawdigits = true
  221. nd++
  222. if ndMant < maxMantDigits {
  223. mantissa *= 16
  224. mantissa += uint64(lower(c) - 'a' + 10)
  225. ndMant++
  226. } else {
  227. trunc = true
  228. }
  229. continue
  230. }
  231. break
  232. }
  233. if !sawdigits {
  234. return
  235. }
  236. if !sawdot {
  237. dp = nd
  238. }
  239. if base == 16 {
  240. dp *= 4
  241. ndMant *= 4
  242. }
  243. // optional exponent moves decimal point.
  244. // if we read a very large, very long number,
  245. // just be sure to move the decimal point by
  246. // a lot (say, 100000). it doesn't matter if it's
  247. // not the exact number.
  248. if i < len(s) && lower(s[i]) == expChar {
  249. i++
  250. if i >= len(s) {
  251. return
  252. }
  253. esign := 1
  254. if s[i] == '+' {
  255. i++
  256. } else if s[i] == '-' {
  257. i++
  258. esign = -1
  259. }
  260. if i >= len(s) || s[i] < '0' || s[i] > '9' {
  261. return
  262. }
  263. e := 0
  264. for ; i < len(s) && ('0' <= s[i] && s[i] <= '9' || s[i] == '_'); i++ {
  265. if s[i] == '_' {
  266. underscores = true
  267. continue
  268. }
  269. if e < 10000 {
  270. e = e*10 + int(s[i]) - '0'
  271. }
  272. }
  273. dp += e * esign
  274. } else if base == 16 {
  275. // Must have exponent.
  276. return
  277. }
  278. if mantissa != 0 {
  279. exp = dp - ndMant
  280. }
  281. if underscores && !underscoreOK(s[:i]) {
  282. return
  283. }
  284. ok = true
  285. return
  286. }
  287. // decimal power of ten to binary power of two.
  288. var powtab = []int{1, 3, 6, 9, 13, 16, 19, 23, 26}
  289. func (d *decimal) floatBits(flt *floatInfo) (b uint64, overflow bool) {
  290. var exp int
  291. var mant uint64
  292. // Zero is always a special case.
  293. if d.nd == 0 {
  294. mant = 0
  295. exp = flt.bias
  296. goto out
  297. }
  298. // Obvious overflow/underflow.
  299. // These bounds are for 64-bit floats.
  300. // Will have to change if we want to support 80-bit floats in the future.
  301. if d.dp > 310 {
  302. goto overflow
  303. }
  304. if d.dp < -330 {
  305. // zero
  306. mant = 0
  307. exp = flt.bias
  308. goto out
  309. }
  310. // Scale by powers of two until in range [0.5, 1.0)
  311. exp = 0
  312. for d.dp > 0 {
  313. var n int
  314. if d.dp >= len(powtab) {
  315. n = 27
  316. } else {
  317. n = powtab[d.dp]
  318. }
  319. d.Shift(-n)
  320. exp += n
  321. }
  322. for d.dp < 0 || d.dp == 0 && d.d[0] < '5' {
  323. var n int
  324. if -d.dp >= len(powtab) {
  325. n = 27
  326. } else {
  327. n = powtab[-d.dp]
  328. }
  329. d.Shift(n)
  330. exp -= n
  331. }
  332. // Our range is [0.5,1) but floating point range is [1,2).
  333. exp--
  334. // Minimum representable exponent is flt.bias+1.
  335. // If the exponent is smaller, move it up and
  336. // adjust d accordingly.
  337. if exp < flt.bias+1 {
  338. n := flt.bias + 1 - exp
  339. d.Shift(-n)
  340. exp += n
  341. }
  342. if exp-flt.bias >= 1<<flt.expbits-1 {
  343. goto overflow
  344. }
  345. // Extract 1+flt.mantbits bits.
  346. d.Shift(int(1 + flt.mantbits))
  347. mant = d.RoundedInteger()
  348. // Rounding might have added a bit; shift down.
  349. if mant == 2<<flt.mantbits {
  350. mant >>= 1
  351. exp++
  352. if exp-flt.bias >= 1<<flt.expbits-1 {
  353. goto overflow
  354. }
  355. }
  356. // Denormalized?
  357. if mant&(1<<flt.mantbits) == 0 {
  358. exp = flt.bias
  359. }
  360. goto out
  361. overflow:
  362. // ±Inf
  363. mant = 0
  364. exp = 1<<flt.expbits - 1 + flt.bias
  365. overflow = true
  366. out:
  367. // Assemble bits.
  368. bits := mant & (uint64(1)<<flt.mantbits - 1)
  369. bits |= uint64((exp-flt.bias)&(1<<flt.expbits-1)) << flt.mantbits
  370. if d.neg {
  371. bits |= 1 << flt.mantbits << flt.expbits
  372. }
  373. return bits, overflow
  374. }
  375. // Exact powers of 10.
  376. var float64pow10 = []float64{
  377. 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9,
  378. 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19,
  379. 1e20, 1e21, 1e22,
  380. }
  381. var float32pow10 = []float32{1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10}
  382. // If possible to convert decimal representation to 64-bit float f exactly,
  383. // entirely in floating-point math, do so, avoiding the expense of decimalToFloatBits.
  384. // Three common cases:
  385. // value is exact integer
  386. // value is exact integer * exact power of ten
  387. // value is exact integer / exact power of ten
  388. // These all produce potentially inexact but correctly rounded answers.
  389. func atof64exact(mantissa uint64, exp int, neg bool) (f float64, ok bool) {
  390. if mantissa>>float64info.mantbits != 0 {
  391. return
  392. }
  393. // gccgo gets this wrong on 32-bit i386 when not using -msse.
  394. // See TestRoundTrip in atof_test.go for a test case.
  395. if runtime.GOARCH == "386" {
  396. return
  397. }
  398. f = float64(mantissa)
  399. if neg {
  400. f = -f
  401. }
  402. switch {
  403. case exp == 0:
  404. // an integer.
  405. return f, true
  406. // Exact integers are <= 10^15.
  407. // Exact powers of ten are <= 10^22.
  408. case exp > 0 && exp <= 15+22: // int * 10^k
  409. // If exponent is big but number of digits is not,
  410. // can move a few zeros into the integer part.
  411. if exp > 22 {
  412. f *= float64pow10[exp-22]
  413. exp = 22
  414. }
  415. if f > 1e15 || f < -1e15 {
  416. // the exponent was really too large.
  417. return
  418. }
  419. return f * float64pow10[exp], true
  420. case exp < 0 && exp >= -22: // int / 10^k
  421. return f / float64pow10[-exp], true
  422. }
  423. return
  424. }
  425. // If possible to compute mantissa*10^exp to 32-bit float f exactly,
  426. // entirely in floating-point math, do so, avoiding the machinery above.
  427. func atof32exact(mantissa uint64, exp int, neg bool) (f float32, ok bool) {
  428. if mantissa>>float32info.mantbits != 0 {
  429. return
  430. }
  431. f = float32(mantissa)
  432. if neg {
  433. f = -f
  434. }
  435. switch {
  436. case exp == 0:
  437. return f, true
  438. // Exact integers are <= 10^7.
  439. // Exact powers of ten are <= 10^10.
  440. case exp > 0 && exp <= 7+10: // int * 10^k
  441. // If exponent is big but number of digits is not,
  442. // can move a few zeros into the integer part.
  443. if exp > 10 {
  444. f *= float32pow10[exp-10]
  445. exp = 10
  446. }
  447. if f > 1e7 || f < -1e7 {
  448. // the exponent was really too large.
  449. return
  450. }
  451. return f * float32pow10[exp], true
  452. case exp < 0 && exp >= -10: // int / 10^k
  453. return f / float32pow10[-exp], true
  454. }
  455. return
  456. }
  457. // atofHex converts the hex floating-point string s
  458. // to a rounded float32 or float64 value (depending on flt==&float32info or flt==&float64info)
  459. // and returns it as a float64.
  460. // The string s has already been parsed into a mantissa, exponent, and sign (neg==true for negative).
  461. // If trunc is true, trailing non-zero bits have been omitted from the mantissa.
  462. func atofHex(s string, flt *floatInfo, mantissa uint64, exp int, neg, trunc bool) (float64, error) {
  463. maxExp := 1<<flt.expbits + flt.bias - 2
  464. minExp := flt.bias + 1
  465. exp += int(flt.mantbits) // mantissa now implicitly divided by 2^mantbits.
  466. // Shift mantissa and exponent to bring representation into float range.
  467. // Eventually we want a mantissa with a leading 1-bit followed by mantbits other bits.
  468. // For rounding, we need two more, where the bottom bit represents
  469. // whether that bit or any later bit was non-zero.
  470. // (If the mantissa has already lost non-zero bits, trunc is true,
  471. // and we OR in a 1 below after shifting left appropriately.)
  472. for mantissa != 0 && mantissa>>(flt.mantbits+2) == 0 {
  473. mantissa <<= 1
  474. exp--
  475. }
  476. if trunc {
  477. mantissa |= 1
  478. }
  479. for mantissa>>(1+flt.mantbits+2) != 0 {
  480. mantissa = mantissa>>1 | mantissa&1
  481. exp++
  482. }
  483. // If exponent is too negative,
  484. // denormalize in hopes of making it representable.
  485. // (The -2 is for the rounding bits.)
  486. for mantissa > 1 && exp < minExp-2 {
  487. mantissa = mantissa>>1 | mantissa&1
  488. exp++
  489. }
  490. // Round using two bottom bits.
  491. round := mantissa & 3
  492. mantissa >>= 2
  493. round |= mantissa & 1 // round to even (round up if mantissa is odd)
  494. exp += 2
  495. if round == 3 {
  496. mantissa++
  497. if mantissa == 1<<(1+flt.mantbits) {
  498. mantissa >>= 1
  499. exp++
  500. }
  501. }
  502. if mantissa>>flt.mantbits == 0 { // Denormal or zero.
  503. exp = flt.bias
  504. }
  505. var err error
  506. if exp > maxExp { // infinity and range error
  507. mantissa = 1 << flt.mantbits
  508. exp = maxExp + 1
  509. err = rangeError(fnParseFloat, s)
  510. }
  511. bits := mantissa & (1<<flt.mantbits - 1)
  512. bits |= uint64((exp-flt.bias)&(1<<flt.expbits-1)) << flt.mantbits
  513. if neg {
  514. bits |= 1 << flt.mantbits << flt.expbits
  515. }
  516. if flt == &float32info {
  517. return float64(math.Float32frombits(uint32(bits))), err
  518. }
  519. return math.Float64frombits(bits), err
  520. }
  521. const fnParseFloat = "ParseFloat"
  522. func atof32(s string) (f float32, n int, err error) {
  523. if val, n, ok := special(s); ok {
  524. return float32(val), n, nil
  525. }
  526. mantissa, exp, neg, trunc, hex, n, ok := readFloat(s)
  527. if !ok {
  528. return 0, n, syntaxError(fnParseFloat, s)
  529. }
  530. if hex {
  531. f, err := atofHex(s[:n], &float32info, mantissa, exp, neg, trunc)
  532. return float32(f), n, err
  533. }
  534. if optimize {
  535. // Try pure floating-point arithmetic conversion, and if that fails,
  536. // the Eisel-Lemire algorithm.
  537. if !trunc {
  538. if f, ok := atof32exact(mantissa, exp, neg); ok {
  539. return f, n, nil
  540. }
  541. }
  542. f, ok := eiselLemire32(mantissa, exp, neg)
  543. if ok {
  544. if !trunc {
  545. return f, n, nil
  546. }
  547. // Even if the mantissa was truncated, we may
  548. // have found the correct result. Confirm by
  549. // converting the upper mantissa bound.
  550. fUp, ok := eiselLemire32(mantissa+1, exp, neg)
  551. if ok && f == fUp {
  552. return f, n, nil
  553. }
  554. }
  555. }
  556. // Slow fallback.
  557. var d decimal
  558. if !d.set(s[:n]) {
  559. return 0, n, syntaxError(fnParseFloat, s)
  560. }
  561. b, ovf := d.floatBits(&float32info)
  562. f = math.Float32frombits(uint32(b))
  563. if ovf {
  564. err = rangeError(fnParseFloat, s)
  565. }
  566. return f, n, err
  567. }
  568. func atof64(s string) (f float64, n int, err error) {
  569. if val, n, ok := special(s); ok {
  570. return val, n, nil
  571. }
  572. mantissa, exp, neg, trunc, hex, n, ok := readFloat(s)
  573. if !ok {
  574. return 0, n, syntaxError(fnParseFloat, s)
  575. }
  576. if hex {
  577. f, err := atofHex(s[:n], &float64info, mantissa, exp, neg, trunc)
  578. return f, n, err
  579. }
  580. if optimize {
  581. // Try pure floating-point arithmetic conversion, and if that fails,
  582. // the Eisel-Lemire algorithm.
  583. if !trunc {
  584. if f, ok := atof64exact(mantissa, exp, neg); ok {
  585. return f, n, nil
  586. }
  587. }
  588. f, ok := eiselLemire64(mantissa, exp, neg)
  589. if ok {
  590. if !trunc {
  591. return f, n, nil
  592. }
  593. // Even if the mantissa was truncated, we may
  594. // have found the correct result. Confirm by
  595. // converting the upper mantissa bound.
  596. fUp, ok := eiselLemire64(mantissa+1, exp, neg)
  597. if ok && f == fUp {
  598. return f, n, nil
  599. }
  600. }
  601. }
  602. // Slow fallback.
  603. var d decimal
  604. if !d.set(s[:n]) {
  605. return 0, n, syntaxError(fnParseFloat, s)
  606. }
  607. b, ovf := d.floatBits(&float64info)
  608. f = math.Float64frombits(b)
  609. if ovf {
  610. err = rangeError(fnParseFloat, s)
  611. }
  612. return f, n, err
  613. }
  614. // ParseFloat converts the string s to a floating-point number
  615. // with the precision specified by bitSize: 32 for float32, or 64 for float64.
  616. // When bitSize=32, the result still has type float64, but it will be
  617. // convertible to float32 without changing its value.
  618. //
  619. // ParseFloat accepts decimal and hexadecimal floating-point number syntax.
  620. // If s is well-formed and near a valid floating-point number,
  621. // ParseFloat returns the nearest floating-point number rounded
  622. // using IEEE754 unbiased rounding.
  623. // (Parsing a hexadecimal floating-point value only rounds when
  624. // there are more bits in the hexadecimal representation than
  625. // will fit in the mantissa.)
  626. //
  627. // The errors that ParseFloat returns have concrete type *NumError
  628. // and include err.Num = s.
  629. //
  630. // If s is not syntactically well-formed, ParseFloat returns err.Err = ErrSyntax.
  631. //
  632. // If s is syntactically well-formed but is more than 1/2 ULP
  633. // away from the largest floating point number of the given size,
  634. // ParseFloat returns f = ±Inf, err.Err = ErrRange.
  635. //
  636. // ParseFloat recognizes the strings "NaN", and the (possibly signed) strings "Inf" and "Infinity"
  637. // as their respective special floating point values. It ignores case when matching.
  638. func ParseFloat(s string, bitSize int) (float64, error) {
  639. f, n, err := parseFloatPrefix(s, bitSize)
  640. if n != len(s) && (err == nil || err.(*NumError).Err != ErrSyntax) {
  641. return 0, syntaxError(fnParseFloat, s)
  642. }
  643. return f, err
  644. }
  645. func parseFloatPrefix(s string, bitSize int) (float64, int, error) {
  646. if bitSize == 32 {
  647. f, n, err := atof32(s)
  648. return float64(f), n, err
  649. }
  650. return atof64(s)
  651. }