scan-16.C 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154
  1. // { dg-require-effective-target size32plus }
  2. // { dg-additional-options "-O2 -fopenmp -fdump-tree-vect-details" }
  3. // { dg-additional-options "-msse2" { target sse2_runtime } }
  4. // { dg-additional-options "-mavx" { target avx_runtime } }
  5. // { dg-final { scan-tree-dump-times "vectorized \[2-6] loops" 2 "vect" { target sse2_runtime } } }
  6. extern "C" void abort ();
  7. struct S {
  8. inline S ();
  9. inline ~S ();
  10. inline S (const S &);
  11. inline S & operator= (const S &);
  12. int s;
  13. };
  14. S::S () : s (0)
  15. {
  16. }
  17. S::~S ()
  18. {
  19. }
  20. S::S (const S &x)
  21. {
  22. s = x.s;
  23. }
  24. S &
  25. S::operator= (const S &x)
  26. {
  27. s = x.s;
  28. return *this;
  29. }
  30. static inline void
  31. ini (S &x)
  32. {
  33. x.s = 0;
  34. }
  35. S r, a[1024], b[1024];
  36. #pragma omp declare reduction (+: S: omp_out.s += omp_in.s)
  37. #pragma omp declare reduction (plus: S: omp_out.s += omp_in.s) initializer (ini (omp_priv))
  38. __attribute__((noipa)) void
  39. foo (S *a, S *b, S &r)
  40. {
  41. #pragma omp for simd simdlen (1) reduction (inscan, +:r)
  42. for (int i = 0; i < 1024; i++)
  43. {
  44. b[i] = r;
  45. #pragma omp scan exclusive(r)
  46. r.s += a[i].s;
  47. }
  48. }
  49. __attribute__((noipa)) S
  50. bar (void)
  51. {
  52. S s;
  53. #pragma omp parallel
  54. #pragma omp for simd if (0) reduction (inscan, plus:s)
  55. for (int i = 0; i < 1024; i++)
  56. {
  57. b[i] = s;
  58. #pragma omp scan exclusive(s)
  59. s.s += 2 * a[i].s;
  60. }
  61. return s;
  62. }
  63. __attribute__((noipa)) void
  64. baz (S *a, S *b, S &r)
  65. {
  66. #pragma omp parallel for simd reduction (inscan, +:r)
  67. for (int i = 0; i < 1024; i++)
  68. {
  69. b[i] = r;
  70. #pragma omp scan exclusive(r)
  71. r.s += a[i].s;
  72. }
  73. }
  74. __attribute__((noipa)) S
  75. qux (void)
  76. {
  77. S s;
  78. #pragma omp parallel for simd reduction (inscan, plus:s)
  79. for (int i = 0; i < 1024; i++)
  80. {
  81. b[i] = s;
  82. #pragma omp scan exclusive(s)
  83. s.s += 2 * a[i].s;
  84. }
  85. return s;
  86. }
  87. int
  88. main ()
  89. {
  90. S s;
  91. for (int i = 0; i < 1024; ++i)
  92. {
  93. a[i].s = i;
  94. b[i].s = -1;
  95. asm ("" : "+g" (i));
  96. }
  97. #pragma omp parallel
  98. foo (a, b, r);
  99. if (r.s != 1024 * 1023 / 2)
  100. abort ();
  101. for (int i = 0; i < 1024; ++i)
  102. {
  103. if (b[i].s != s.s)
  104. abort ();
  105. else
  106. b[i].s = 25;
  107. s.s += i;
  108. }
  109. if (bar ().s != 1024 * 1023)
  110. abort ();
  111. s.s = 0;
  112. for (int i = 0; i < 1024; ++i)
  113. {
  114. if (b[i].s != s.s)
  115. abort ();
  116. s.s += 2 * i;
  117. }
  118. r.s = 0;
  119. baz (a, b, r);
  120. if (r.s != 1024 * 1023 / 2)
  121. abort ();
  122. s.s = 0;
  123. for (int i = 0; i < 1024; ++i)
  124. {
  125. if (b[i].s != s.s)
  126. abort ();
  127. else
  128. b[i].s = 25;
  129. s.s += i;
  130. }
  131. if (qux ().s != 1024 * 1023)
  132. abort ();
  133. s.s = 0;
  134. for (int i = 0; i < 1024; ++i)
  135. {
  136. if (b[i].s != s.s)
  137. abort ();
  138. s.s += 2 * i;
  139. }
  140. }