arith.go 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277
  1. // Copyright 2009 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. // This file provides Go implementations of elementary multi-precision
  5. // arithmetic operations on word vectors. These have the suffix _g.
  6. // These are needed for platforms without assembly implementations of these routines.
  7. // This file also contains elementary operations that can be implemented
  8. // sufficiently efficiently in Go.
  9. package big
  10. import "math/bits"
  11. // A Word represents a single digit of a multi-precision unsigned integer.
  12. type Word uint
  13. const (
  14. _S = _W / 8 // word size in bytes
  15. _W = bits.UintSize // word size in bits
  16. _B = 1 << _W // digit base
  17. _M = _B - 1 // digit mask
  18. )
  19. // Many of the loops in this file are of the form
  20. // for i := 0; i < len(z) && i < len(x) && i < len(y); i++
  21. // i < len(z) is the real condition.
  22. // However, checking i < len(x) && i < len(y) as well is faster than
  23. // having the compiler do a bounds check in the body of the loop;
  24. // remarkably it is even faster than hoisting the bounds check
  25. // out of the loop, by doing something like
  26. // _, _ = x[len(z)-1], y[len(z)-1]
  27. // There are other ways to hoist the bounds check out of the loop,
  28. // but the compiler's BCE isn't powerful enough for them (yet?).
  29. // See the discussion in CL 164966.
  30. // ----------------------------------------------------------------------------
  31. // Elementary operations on words
  32. //
  33. // These operations are used by the vector operations below.
  34. // z1<<_W + z0 = x*y
  35. func mulWW_g(x, y Word) (z1, z0 Word) {
  36. hi, lo := bits.Mul(uint(x), uint(y))
  37. return Word(hi), Word(lo)
  38. }
  39. // z1<<_W + z0 = x*y + c
  40. func mulAddWWW_g(x, y, c Word) (z1, z0 Word) {
  41. hi, lo := bits.Mul(uint(x), uint(y))
  42. var cc uint
  43. lo, cc = bits.Add(lo, uint(c), 0)
  44. return Word(hi + cc), Word(lo)
  45. }
  46. // nlz returns the number of leading zeros in x.
  47. // Wraps bits.LeadingZeros call for convenience.
  48. func nlz(x Word) uint {
  49. return uint(bits.LeadingZeros(uint(x)))
  50. }
  51. // The resulting carry c is either 0 or 1.
  52. func addVV_g(z, x, y []Word) (c Word) {
  53. // The comment near the top of this file discusses this for loop condition.
  54. for i := 0; i < len(z) && i < len(x) && i < len(y); i++ {
  55. zi, cc := bits.Add(uint(x[i]), uint(y[i]), uint(c))
  56. z[i] = Word(zi)
  57. c = Word(cc)
  58. }
  59. return
  60. }
  61. // The resulting carry c is either 0 or 1.
  62. func subVV_g(z, x, y []Word) (c Word) {
  63. // The comment near the top of this file discusses this for loop condition.
  64. for i := 0; i < len(z) && i < len(x) && i < len(y); i++ {
  65. zi, cc := bits.Sub(uint(x[i]), uint(y[i]), uint(c))
  66. z[i] = Word(zi)
  67. c = Word(cc)
  68. }
  69. return
  70. }
  71. // The resulting carry c is either 0 or 1.
  72. func addVW_g(z, x []Word, y Word) (c Word) {
  73. c = y
  74. // The comment near the top of this file discusses this for loop condition.
  75. for i := 0; i < len(z) && i < len(x); i++ {
  76. zi, cc := bits.Add(uint(x[i]), uint(c), 0)
  77. z[i] = Word(zi)
  78. c = Word(cc)
  79. }
  80. return
  81. }
  82. // addVWlarge is addVW, but intended for large z.
  83. // The only difference is that we check on every iteration
  84. // whether we are done with carries,
  85. // and if so, switch to a much faster copy instead.
  86. // This is only a good idea for large z,
  87. // because the overhead of the check and the function call
  88. // outweigh the benefits when z is small.
  89. func addVWlarge(z, x []Word, y Word) (c Word) {
  90. c = y
  91. // The comment near the top of this file discusses this for loop condition.
  92. for i := 0; i < len(z) && i < len(x); i++ {
  93. if c == 0 {
  94. copy(z[i:], x[i:])
  95. return
  96. }
  97. zi, cc := bits.Add(uint(x[i]), uint(c), 0)
  98. z[i] = Word(zi)
  99. c = Word(cc)
  100. }
  101. return
  102. }
  103. func subVW_g(z, x []Word, y Word) (c Word) {
  104. c = y
  105. // The comment near the top of this file discusses this for loop condition.
  106. for i := 0; i < len(z) && i < len(x); i++ {
  107. zi, cc := bits.Sub(uint(x[i]), uint(c), 0)
  108. z[i] = Word(zi)
  109. c = Word(cc)
  110. }
  111. return
  112. }
  113. // subVWlarge is to subVW as addVWlarge is to addVW.
  114. func subVWlarge(z, x []Word, y Word) (c Word) {
  115. c = y
  116. // The comment near the top of this file discusses this for loop condition.
  117. for i := 0; i < len(z) && i < len(x); i++ {
  118. if c == 0 {
  119. copy(z[i:], x[i:])
  120. return
  121. }
  122. zi, cc := bits.Sub(uint(x[i]), uint(c), 0)
  123. z[i] = Word(zi)
  124. c = Word(cc)
  125. }
  126. return
  127. }
  128. func shlVU_g(z, x []Word, s uint) (c Word) {
  129. if s == 0 {
  130. copy(z, x)
  131. return
  132. }
  133. if len(z) == 0 {
  134. return
  135. }
  136. s &= _W - 1 // hint to the compiler that shifts by s don't need guard code
  137. ŝ := _W - s
  138. ŝ &= _W - 1 // ditto
  139. c = x[len(z)-1] >> ŝ
  140. for i := len(z) - 1; i > 0; i-- {
  141. z[i] = x[i]<<s | x[i-1]>>ŝ
  142. }
  143. z[0] = x[0] << s
  144. return
  145. }
  146. func shrVU_g(z, x []Word, s uint) (c Word) {
  147. if s == 0 {
  148. copy(z, x)
  149. return
  150. }
  151. if len(z) == 0 {
  152. return
  153. }
  154. if len(x) != len(z) {
  155. // This is an invariant guaranteed by the caller.
  156. panic("len(x) != len(z)")
  157. }
  158. s &= _W - 1 // hint to the compiler that shifts by s don't need guard code
  159. ŝ := _W - s
  160. ŝ &= _W - 1 // ditto
  161. c = x[0] << ŝ
  162. for i := 1; i < len(z); i++ {
  163. z[i-1] = x[i-1]>>s | x[i]<<ŝ
  164. }
  165. z[len(z)-1] = x[len(z)-1] >> s
  166. return
  167. }
  168. func mulAddVWW_g(z, x []Word, y, r Word) (c Word) {
  169. c = r
  170. // The comment near the top of this file discusses this for loop condition.
  171. for i := 0; i < len(z) && i < len(x); i++ {
  172. c, z[i] = mulAddWWW_g(x[i], y, c)
  173. }
  174. return
  175. }
  176. func addMulVVW_g(z, x []Word, y Word) (c Word) {
  177. // The comment near the top of this file discusses this for loop condition.
  178. for i := 0; i < len(z) && i < len(x); i++ {
  179. z1, z0 := mulAddWWW_g(x[i], y, z[i])
  180. lo, cc := bits.Add(uint(z0), uint(c), 0)
  181. c, z[i] = Word(cc), Word(lo)
  182. c += z1
  183. }
  184. return
  185. }
  186. // q = ( x1 << _W + x0 - r)/y. m = floor(( _B^2 - 1 ) / d - _B). Requiring x1<y.
  187. // An approximate reciprocal with a reference to "Improved Division by Invariant Integers
  188. // (IEEE Transactions on Computers, 11 Jun. 2010)"
  189. func divWW(x1, x0, y, m Word) (q, r Word) {
  190. s := nlz(y)
  191. if s != 0 {
  192. x1 = x1<<s | x0>>(_W-s)
  193. x0 <<= s
  194. y <<= s
  195. }
  196. d := uint(y)
  197. // We know that
  198. // m = ⎣(B^2-1)/d⎦-B
  199. // ⎣(B^2-1)/d⎦ = m+B
  200. // (B^2-1)/d = m+B+delta1 0 <= delta1 <= (d-1)/d
  201. // B^2/d = m+B+delta2 0 <= delta2 <= 1
  202. // The quotient we're trying to compute is
  203. // quotient = ⎣(x1*B+x0)/d⎦
  204. // = ⎣(x1*B*(B^2/d)+x0*(B^2/d))/B^2⎦
  205. // = ⎣(x1*B*(m+B+delta2)+x0*(m+B+delta2))/B^2⎦
  206. // = ⎣(x1*m+x1*B+x0)/B + x0*m/B^2 + delta2*(x1*B+x0)/B^2⎦
  207. // The latter two terms of this three-term sum are between 0 and 1.
  208. // So we can compute just the first term, and we will be low by at most 2.
  209. t1, t0 := bits.Mul(uint(m), uint(x1))
  210. _, c := bits.Add(t0, uint(x0), 0)
  211. t1, _ = bits.Add(t1, uint(x1), c)
  212. // The quotient is either t1, t1+1, or t1+2.
  213. // We'll try t1 and adjust if needed.
  214. qq := t1
  215. // compute remainder r=x-d*q.
  216. dq1, dq0 := bits.Mul(d, qq)
  217. r0, b := bits.Sub(uint(x0), dq0, 0)
  218. r1, _ := bits.Sub(uint(x1), dq1, b)
  219. // The remainder we just computed is bounded above by B+d:
  220. // r = x1*B + x0 - d*q.
  221. // = x1*B + x0 - d*⎣(x1*m+x1*B+x0)/B⎦
  222. // = x1*B + x0 - d*((x1*m+x1*B+x0)/B-alpha) 0 <= alpha < 1
  223. // = x1*B + x0 - x1*d/B*m - x1*d - x0*d/B + d*alpha
  224. // = x1*B + x0 - x1*d/B*⎣(B^2-1)/d-B⎦ - x1*d - x0*d/B + d*alpha
  225. // = x1*B + x0 - x1*d/B*⎣(B^2-1)/d-B⎦ - x1*d - x0*d/B + d*alpha
  226. // = x1*B + x0 - x1*d/B*((B^2-1)/d-B-beta) - x1*d - x0*d/B + d*alpha 0 <= beta < 1
  227. // = x1*B + x0 - x1*B + x1/B + x1*d + x1*d/B*beta - x1*d - x0*d/B + d*alpha
  228. // = x0 + x1/B + x1*d/B*beta - x0*d/B + d*alpha
  229. // = x0*(1-d/B) + x1*(1+d*beta)/B + d*alpha
  230. // < B*(1-d/B) + d*B/B + d because x0<B (and 1-d/B>0), x1<d, 1+d*beta<=B, alpha<1
  231. // = B - d + d + d
  232. // = B+d
  233. // So r1 can only be 0 or 1. If r1 is 1, then we know q was too small.
  234. // Add 1 to q and subtract d from r. That guarantees that r is <B, so
  235. // we no longer need to keep track of r1.
  236. if r1 != 0 {
  237. qq++
  238. r0 -= d
  239. }
  240. // If the remainder is still too large, increment q one more time.
  241. if r0 >= d {
  242. qq++
  243. r0 -= d
  244. }
  245. return Word(qq), Word(r0 >> s)
  246. }
  247. // reciprocalWord return the reciprocal of the divisor. rec = floor(( _B^2 - 1 ) / u - _B). u = d1 << nlz(d1).
  248. func reciprocalWord(d1 Word) Word {
  249. u := uint(d1 << nlz(d1))
  250. x1 := ^u
  251. x0 := uint(_M)
  252. rec, _ := bits.Div(x1, x0, u) // (_B^2-1)/U-_B = (_B*(_M-C)+_M)/U
  253. return Word(rec)
  254. }