scan-13.C 2.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162
  1. // { dg-require-effective-target size32plus }
  2. // { dg-additional-options "-O2 -fopenmp -fdump-tree-vect-details" }
  3. // { dg-additional-options "-msse2" { target sse2_runtime } }
  4. // { dg-additional-options "-mavx" { target avx_runtime } }
  5. // { dg-final { scan-tree-dump-times "vectorized \[2-6] loops" 2 "vect" { target sse2_runtime } } }
  6. extern "C" void abort ();
  7. template <typename T>
  8. struct S {
  9. inline S ();
  10. inline ~S ();
  11. inline S (const S &);
  12. inline S & operator= (const S &);
  13. T s;
  14. };
  15. template <typename T>
  16. S<T>::S () : s (0)
  17. {
  18. }
  19. template <typename T>
  20. S<T>::~S ()
  21. {
  22. }
  23. template <typename T>
  24. S<T>::S (const S &x)
  25. {
  26. s = x.s;
  27. }
  28. template <typename T>
  29. S<T> &
  30. S<T>::operator= (const S &x)
  31. {
  32. s = x.s;
  33. return *this;
  34. }
  35. template <typename T>
  36. static inline void
  37. ini (S<T> &x)
  38. {
  39. x.s = 0;
  40. }
  41. S<int> r, a[1024], b[1024];
  42. #pragma omp declare reduction (+: S<int>: omp_out.s += omp_in.s)
  43. #pragma omp declare reduction (plus: S<int>: omp_out.s += omp_in.s) initializer (ini (omp_priv))
  44. template <typename T>
  45. __attribute__((noipa)) void
  46. foo (S<T> *a, S<T> *b)
  47. {
  48. #pragma omp for simd if (0) reduction (inscan, +:r)
  49. for (int i = 0; i < 1024; i++)
  50. {
  51. b[i] = r;
  52. #pragma omp scan exclusive(r)
  53. r.s += a[i].s;
  54. }
  55. }
  56. template <typename T>
  57. __attribute__((noipa)) S<T>
  58. bar (void)
  59. {
  60. S<T> s;
  61. #pragma omp parallel
  62. #pragma omp for simd reduction (inscan, plus:s)
  63. for (int i = 0; i < 1024; i++)
  64. {
  65. b[i] = s;
  66. #pragma omp scan exclusive(s)
  67. s.s += 2 * a[i].s;
  68. }
  69. return S<T> (s);
  70. }
  71. __attribute__((noipa)) void
  72. baz (S<int> *a, S<int> *b)
  73. {
  74. #pragma omp parallel for simd reduction (inscan, +:r)
  75. for (int i = 0; i < 1024; i++)
  76. {
  77. b[i] = r;
  78. #pragma omp scan exclusive(r)
  79. r.s += a[i].s;
  80. }
  81. }
  82. __attribute__((noipa)) S<int>
  83. qux (void)
  84. {
  85. S<int> s;
  86. #pragma omp parallel for simd simdlen(1) reduction (inscan, plus:s)
  87. for (int i = 0; i < 1024; i++)
  88. {
  89. b[i] = s;
  90. #pragma omp scan exclusive(s)
  91. s.s += 2 * a[i].s;
  92. }
  93. return S<int> (s);
  94. }
  95. int
  96. main ()
  97. {
  98. S<int> s;
  99. for (int i = 0; i < 1024; ++i)
  100. {
  101. a[i].s = i;
  102. b[i].s = -1;
  103. asm ("" : "+g" (i));
  104. }
  105. #pragma omp parallel
  106. foo (a, b);
  107. if (r.s != 1024 * 1023 / 2)
  108. abort ();
  109. for (int i = 0; i < 1024; ++i)
  110. {
  111. if (b[i].s != s.s)
  112. abort ();
  113. else
  114. b[i].s = 25;
  115. s.s += i;
  116. }
  117. if (bar<int> ().s != 1024 * 1023)
  118. abort ();
  119. s.s = 0;
  120. for (int i = 0; i < 1024; ++i)
  121. {
  122. if (b[i].s != s.s)
  123. abort ();
  124. s.s += 2 * i;
  125. }
  126. r.s = 0;
  127. baz (a, b);
  128. if (r.s != 1024 * 1023 / 2)
  129. abort ();
  130. s.s = 0;
  131. for (int i = 0; i < 1024; ++i)
  132. {
  133. if (b[i].s != s.s)
  134. abort ();
  135. else
  136. b[i].s = 25;
  137. s.s += i;
  138. }
  139. if (qux ().s != 1024 * 1023)
  140. abort ();
  141. s.s = 0;
  142. for (int i = 0; i < 1024; ++i)
  143. {
  144. if (b[i].s != s.s)
  145. abort ();
  146. s.s += 2 * i;
  147. }
  148. }