numeric_impl.h 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354
  1. // -*- C++ -*-
  2. //===-- numeric_impl.h ----------------------------------------------------===//
  3. //
  4. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  5. // See https://llvm.org/LICENSE.txt for license information.
  6. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  7. //
  8. //===----------------------------------------------------------------------===//
  9. #ifndef _PSTL_NUMERIC_IMPL_H
  10. #define _PSTL_NUMERIC_IMPL_H
  11. #include <iterator>
  12. #include <type_traits>
  13. #include <numeric>
  14. #include "parallel_backend.h"
  15. #include "pstl_config.h"
  16. #include "execution_impl.h"
  17. #include "unseq_backend_simd.h"
  18. #include "algorithm_fwd.h"
  19. namespace __pstl
  20. {
  21. namespace __internal
  22. {
  23. //------------------------------------------------------------------------
  24. // transform_reduce (version with two binary functions, according to draft N4659)
  25. //------------------------------------------------------------------------
  26. template <class _ForwardIterator1, class _ForwardIterator2, class _Tp, class _BinaryOperation1, class _BinaryOperation2>
  27. _Tp
  28. __brick_transform_reduce(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _Tp __init,
  29. _BinaryOperation1 __binary_op1, _BinaryOperation2 __binary_op2,
  30. /*is_vector=*/std::false_type) noexcept
  31. {
  32. return std::inner_product(__first1, __last1, __first2, __init, __binary_op1, __binary_op2);
  33. }
  34. template <class _ForwardIterator1, class _ForwardIterator2, class _Tp, class _BinaryOperation1, class _BinaryOperation2>
  35. _Tp
  36. __brick_transform_reduce(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _Tp __init,
  37. _BinaryOperation1 __binary_op1, _BinaryOperation2 __binary_op2,
  38. /*is_vector=*/std::true_type) noexcept
  39. {
  40. typedef typename std::iterator_traits<_ForwardIterator1>::difference_type _DifferenceType;
  41. return __unseq_backend::__simd_transform_reduce(
  42. __last1 - __first1, __init, __binary_op1,
  43. [=, &__binary_op2](_DifferenceType __i) { return __binary_op2(__first1[__i], __first2[__i]); });
  44. }
  45. template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _Tp, class _BinaryOperation1,
  46. class _BinaryOperation2, class _IsVector>
  47. _Tp
  48. __pattern_transform_reduce(_ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1,
  49. _ForwardIterator2 __first2, _Tp __init, _BinaryOperation1 __binary_op1,
  50. _BinaryOperation2 __binary_op2, _IsVector __is_vector,
  51. /*is_parallel=*/std::false_type) noexcept
  52. {
  53. return __brick_transform_reduce(__first1, __last1, __first2, __init, __binary_op1, __binary_op2, __is_vector);
  54. }
  55. template <class _ExecutionPolicy, class _RandomAccessIterator1, class _RandomAccessIterator2, class _Tp,
  56. class _BinaryOperation1, class _BinaryOperation2, class _IsVector>
  57. _Tp
  58. __pattern_transform_reduce(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1,
  59. _RandomAccessIterator2 __first2, _Tp __init, _BinaryOperation1 __binary_op1,
  60. _BinaryOperation2 __binary_op2, _IsVector __is_vector, /*is_parallel=*/std::true_type)
  61. {
  62. return __internal::__except_handler([&]() {
  63. return __par_backend::__parallel_transform_reduce(
  64. std::forward<_ExecutionPolicy>(__exec), __first1, __last1,
  65. [__first1, __first2, __binary_op2](_RandomAccessIterator1 __i) mutable {
  66. return __binary_op2(*__i, *(__first2 + (__i - __first1)));
  67. },
  68. __init,
  69. __binary_op1, // Combine
  70. [__first1, __first2, __binary_op1, __binary_op2,
  71. __is_vector](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j, _Tp __init) -> _Tp {
  72. return __internal::__brick_transform_reduce(__i, __j, __first2 + (__i - __first1), __init, __binary_op1,
  73. __binary_op2, __is_vector);
  74. });
  75. });
  76. }
  77. //------------------------------------------------------------------------
  78. // transform_reduce (version with unary and binary functions)
  79. //------------------------------------------------------------------------
  80. template <class _ForwardIterator, class _Tp, class _BinaryOperation, class _UnaryOperation>
  81. _Tp
  82. __brick_transform_reduce(_ForwardIterator __first, _ForwardIterator __last, _Tp __init, _BinaryOperation __binary_op,
  83. _UnaryOperation __unary_op, /*is_vector=*/std::false_type) noexcept
  84. {
  85. return std::transform_reduce(__first, __last, __init, __binary_op, __unary_op);
  86. }
  87. template <class _ForwardIterator, class _Tp, class _UnaryOperation, class _BinaryOperation>
  88. _Tp
  89. __brick_transform_reduce(_ForwardIterator __first, _ForwardIterator __last, _Tp __init, _BinaryOperation __binary_op,
  90. _UnaryOperation __unary_op, /*is_vector=*/std::true_type) noexcept
  91. {
  92. typedef typename std::iterator_traits<_ForwardIterator>::difference_type _DifferenceType;
  93. return __unseq_backend::__simd_transform_reduce(
  94. __last - __first, __init, __binary_op,
  95. [=, &__unary_op](_DifferenceType __i) { return __unary_op(__first[__i]); });
  96. }
  97. template <class _ExecutionPolicy, class _ForwardIterator, class _Tp, class _BinaryOperation, class _UnaryOperation,
  98. class _IsVector>
  99. _Tp
  100. __pattern_transform_reduce(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Tp __init,
  101. _BinaryOperation __binary_op, _UnaryOperation __unary_op, _IsVector __is_vector,
  102. /*is_parallel=*/std::false_type) noexcept
  103. {
  104. return __internal::__brick_transform_reduce(__first, __last, __init, __binary_op, __unary_op, __is_vector);
  105. }
  106. template <class _ExecutionPolicy, class _ForwardIterator, class _Tp, class _BinaryOperation, class _UnaryOperation,
  107. class _IsVector>
  108. _Tp
  109. __pattern_transform_reduce(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Tp __init,
  110. _BinaryOperation __binary_op, _UnaryOperation __unary_op, _IsVector __is_vector,
  111. /*is_parallel=*/std::true_type)
  112. {
  113. return __internal::__except_handler([&]() {
  114. return __par_backend::__parallel_transform_reduce(
  115. std::forward<_ExecutionPolicy>(__exec), __first, __last,
  116. [__unary_op](_ForwardIterator __i) mutable { return __unary_op(*__i); }, __init, __binary_op,
  117. [__unary_op, __binary_op, __is_vector](_ForwardIterator __i, _ForwardIterator __j, _Tp __init) {
  118. return __internal::__brick_transform_reduce(__i, __j, __init, __binary_op, __unary_op, __is_vector);
  119. });
  120. });
  121. }
  122. //------------------------------------------------------------------------
  123. // transform_exclusive_scan
  124. //
  125. // walk3 evaluates f(x,y,z) for (x,y,z) drawn from [first1,last1), [first2,...), [first3,...)
  126. //------------------------------------------------------------------------
  127. // Exclusive form
  128. template <class _ForwardIterator, class _OutputIterator, class _UnaryOperation, class _Tp, class _BinaryOperation>
  129. std::pair<_OutputIterator, _Tp>
  130. __brick_transform_scan(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result,
  131. _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op,
  132. /*Inclusive*/ std::false_type, /*is_vector=*/std::false_type) noexcept
  133. {
  134. for (; __first != __last; ++__first, ++__result)
  135. {
  136. *__result = __init;
  137. _PSTL_PRAGMA_FORCEINLINE
  138. __init = __binary_op(__init, __unary_op(*__first));
  139. }
  140. return std::make_pair(__result, __init);
  141. }
  142. // Inclusive form
  143. template <class _ForwardIterator, class _OutputIterator, class _UnaryOperation, class _Tp, class _BinaryOperation>
  144. std::pair<_OutputIterator, _Tp>
  145. __brick_transform_scan(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result,
  146. _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op,
  147. /*Inclusive*/ std::true_type, /*is_vector=*/std::false_type) noexcept
  148. {
  149. for (; __first != __last; ++__first, ++__result)
  150. {
  151. _PSTL_PRAGMA_FORCEINLINE
  152. __init = __binary_op(__init, __unary_op(*__first));
  153. *__result = __init;
  154. }
  155. return std::make_pair(__result, __init);
  156. }
  157. // type is arithmetic and binary operation is a user defined operation.
  158. template <typename _Tp, typename _BinaryOperation>
  159. using is_arithmetic_udop = std::integral_constant<bool, std::is_arithmetic<_Tp>::value &&
  160. !std::is_same<_BinaryOperation, std::plus<_Tp>>::value>;
  161. // [restriction] - T shall be DefaultConstructible.
  162. // [violation] - default ctor of T shall set the identity value for binary_op.
  163. template <class _ForwardIterator, class _OutputIterator, class _UnaryOperation, class _Tp, class _BinaryOperation,
  164. class _Inclusive>
  165. typename std::enable_if<!is_arithmetic_udop<_Tp, _BinaryOperation>::value, std::pair<_OutputIterator, _Tp>>::type
  166. __brick_transform_scan(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result,
  167. _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op, _Inclusive,
  168. /*is_vector=*/std::true_type) noexcept
  169. {
  170. #if (_PSTL_UDS_PRESENT)
  171. return __unseq_backend::__simd_scan(__first, __last - __first, __result, __unary_op, __init, __binary_op,
  172. _Inclusive());
  173. #else
  174. // We need to call serial brick here to call function for inclusive and exclusive scan that depends on _Inclusive() value
  175. return __internal::__brick_transform_scan(__first, __last, __result, __unary_op, __init, __binary_op, _Inclusive(),
  176. /*is_vector=*/std::false_type());
  177. #endif
  178. }
  179. template <class _ForwardIterator, class _OutputIterator, class _UnaryOperation, class _Tp, class _BinaryOperation,
  180. class _Inclusive>
  181. typename std::enable_if<is_arithmetic_udop<_Tp, _BinaryOperation>::value, std::pair<_OutputIterator, _Tp>>::type
  182. __brick_transform_scan(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result,
  183. _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op, _Inclusive,
  184. /*is_vector=*/std::true_type) noexcept
  185. {
  186. return __internal::__brick_transform_scan(__first, __last, __result, __unary_op, __init, __binary_op, _Inclusive(),
  187. /*is_vector=*/std::false_type());
  188. }
  189. template <class _ExecutionPolicy, class _ForwardIterator, class _OutputIterator, class _UnaryOperation, class _Tp,
  190. class _BinaryOperation, class _Inclusive, class _IsVector>
  191. _OutputIterator
  192. __pattern_transform_scan(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last,
  193. _OutputIterator __result, _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op,
  194. _Inclusive, _IsVector __is_vector, /*is_parallel=*/std::false_type) noexcept
  195. {
  196. return __internal::__brick_transform_scan(__first, __last, __result, __unary_op, __init, __binary_op, _Inclusive(),
  197. __is_vector)
  198. .first;
  199. }
  200. template <class _ExecutionPolicy, class _RandomAccessIterator, class _OutputIterator, class _UnaryOperation, class _Tp,
  201. class _BinaryOperation, class _Inclusive, class _IsVector>
  202. typename std::enable_if<!std::is_floating_point<_Tp>::value, _OutputIterator>::type
  203. __pattern_transform_scan(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last,
  204. _OutputIterator __result, _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op,
  205. _Inclusive, _IsVector __is_vector, /*is_parallel=*/std::true_type)
  206. {
  207. typedef typename std::iterator_traits<_RandomAccessIterator>::difference_type _DifferenceType;
  208. return __internal::__except_handler([&]() {
  209. __par_backend::__parallel_transform_scan(
  210. std::forward<_ExecutionPolicy>(__exec), __last - __first,
  211. [__first, __unary_op](_DifferenceType __i) mutable { return __unary_op(__first[__i]); }, __init,
  212. __binary_op,
  213. [__first, __unary_op, __binary_op](_DifferenceType __i, _DifferenceType __j, _Tp __init) {
  214. // Execute serial __brick_transform_reduce, due to the explicit SIMD vectorization (reduction) requires a commutative operation for the guarantee of correct scan.
  215. return __internal::__brick_transform_reduce(__first + __i, __first + __j, __init, __binary_op,
  216. __unary_op,
  217. /*__is_vector*/ std::false_type());
  218. },
  219. [__first, __unary_op, __binary_op, __result, __is_vector](_DifferenceType __i, _DifferenceType __j,
  220. _Tp __init) {
  221. return __internal::__brick_transform_scan(__first + __i, __first + __j, __result + __i, __unary_op,
  222. __init, __binary_op, _Inclusive(), __is_vector)
  223. .second;
  224. });
  225. return __result + (__last - __first);
  226. });
  227. }
  228. template <class _ExecutionPolicy, class _RandomAccessIterator, class _OutputIterator, class _UnaryOperation, class _Tp,
  229. class _BinaryOperation, class _Inclusive, class _IsVector>
  230. typename std::enable_if<std::is_floating_point<_Tp>::value, _OutputIterator>::type
  231. __pattern_transform_scan(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last,
  232. _OutputIterator __result, _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op,
  233. _Inclusive, _IsVector __is_vector, /*is_parallel=*/std::true_type)
  234. {
  235. typedef typename std::iterator_traits<_RandomAccessIterator>::difference_type _DifferenceType;
  236. _DifferenceType __n = __last - __first;
  237. if (__n <= 0)
  238. {
  239. return __result;
  240. }
  241. return __internal::__except_handler([&]() {
  242. __par_backend::__parallel_strict_scan(
  243. std::forward<_ExecutionPolicy>(__exec), __n, __init,
  244. [__first, __unary_op, __binary_op, __result, __is_vector](_DifferenceType __i, _DifferenceType __len) {
  245. return __internal::__brick_transform_scan(__first + __i, __first + (__i + __len), __result + __i,
  246. __unary_op, _Tp{}, __binary_op, _Inclusive(), __is_vector)
  247. .second;
  248. },
  249. __binary_op,
  250. [__result, &__binary_op](_DifferenceType __i, _DifferenceType __len, _Tp __initial) {
  251. return *(std::transform(__result + __i, __result + __i + __len, __result + __i,
  252. [&__initial, &__binary_op](const _Tp& __x) {
  253. _PSTL_PRAGMA_FORCEINLINE
  254. return __binary_op(__initial, __x);
  255. }) -
  256. 1);
  257. },
  258. [](_Tp) {});
  259. return __result + (__last - __first);
  260. });
  261. }
  262. //------------------------------------------------------------------------
  263. // adjacent_difference
  264. //------------------------------------------------------------------------
  265. template <class _ForwardIterator, class _OutputIterator, class _BinaryOperation>
  266. _OutputIterator
  267. __brick_adjacent_difference(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator __d_first,
  268. _BinaryOperation __op, /*is_vector*/ std::false_type) noexcept
  269. {
  270. return std::adjacent_difference(__first, __last, __d_first, __op);
  271. }
  272. template <class _ForwardIterator1, class _ForwardIterator2, class BinaryOperation>
  273. _ForwardIterator2
  274. __brick_adjacent_difference(_ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __d_first,
  275. BinaryOperation __op, /*is_vector=*/std::true_type) noexcept
  276. {
  277. _PSTL_ASSERT(__first != __last);
  278. typedef typename std::iterator_traits<_ForwardIterator1>::reference _ReferenceType1;
  279. typedef typename std::iterator_traits<_ForwardIterator2>::reference _ReferenceType2;
  280. auto __n = __last - __first;
  281. *__d_first = *__first;
  282. return __unseq_backend::__simd_walk_3(
  283. __first + 1, __n - 1, __first, __d_first + 1,
  284. [&__op](_ReferenceType1 __x, _ReferenceType1 __y, _ReferenceType2 __z) { __z = __op(__x, __y); });
  285. }
  286. template <class _ExecutionPolicy, class _ForwardIterator, class _OutputIterator, class _BinaryOperation,
  287. class _IsVector>
  288. _OutputIterator
  289. __pattern_adjacent_difference(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last,
  290. _OutputIterator __d_first, _BinaryOperation __op, _IsVector __is_vector,
  291. /*is_parallel*/ std::false_type) noexcept
  292. {
  293. return __internal::__brick_adjacent_difference(__first, __last, __d_first, __op, __is_vector);
  294. }
  295. template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _BinaryOperation,
  296. class _IsVector>
  297. _ForwardIterator2
  298. __pattern_adjacent_difference(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last,
  299. _ForwardIterator2 __d_first, _BinaryOperation __op, _IsVector __is_vector,
  300. /*is_parallel=*/std::true_type)
  301. {
  302. _PSTL_ASSERT(__first != __last);
  303. typedef typename std::iterator_traits<_ForwardIterator1>::reference _ReferenceType1;
  304. typedef typename std::iterator_traits<_ForwardIterator2>::reference _ReferenceType2;
  305. *__d_first = *__first;
  306. __par_backend::__parallel_for(
  307. std::forward<_ExecutionPolicy>(__exec), __first, __last - 1,
  308. [&__op, __is_vector, __d_first, __first](_ForwardIterator1 __b, _ForwardIterator1 __e) {
  309. _ForwardIterator2 __d_b = __d_first + (__b - __first);
  310. __internal::__brick_walk3(
  311. __b, __e, __b + 1, __d_b + 1,
  312. [&__op](_ReferenceType1 __x, _ReferenceType1 __y, _ReferenceType2 __z) { __z = __op(__y, __x); },
  313. __is_vector);
  314. });
  315. return __d_first + (__last - __first);
  316. }
  317. } // namespace __internal
  318. } // namespace __pstl
  319. #endif /* _PSTL_NUMERIC_IMPL_H */