scan-15.C 2.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122
  1. // { dg-require-effective-target size32plus }
  2. // { dg-additional-options "-O2 -fopenmp -fdump-tree-vect-details" }
  3. // { dg-additional-options "-msse2" { target sse2_runtime } }
  4. // { dg-additional-options "-mavx" { target avx_runtime } }
  5. // { dg-final { scan-tree-dump-times "vectorized \[2-6] loops" 2 "vect" { target sse2_runtime } } }
  6. extern "C" void abort ();
  7. int r, a[1024], b[1024], q;
  8. #pragma omp declare reduction (foo: int: omp_out += omp_in) initializer (omp_priv = 0)
  9. __attribute__((noipa)) void
  10. foo (int *a, int *b, int &r)
  11. {
  12. #pragma omp for simd reduction (inscan, foo:r)
  13. for (int i = 0; i < 1024; i++)
  14. {
  15. b[i] = r;
  16. #pragma omp scan exclusive(r)
  17. r += a[i];
  18. }
  19. }
  20. __attribute__((noipa)) int
  21. bar (void)
  22. {
  23. int &s = q;
  24. q = 0;
  25. #pragma omp parallel
  26. #pragma omp for simd reduction (inscan, foo:s) nowait
  27. for (int i = 0; i < 1024; i++)
  28. {
  29. b[i] = s;
  30. #pragma omp scan exclusive(s)
  31. s += 2 * a[i];
  32. }
  33. return s;
  34. }
  35. __attribute__((noipa)) void
  36. baz (int *a, int *b, int &r)
  37. {
  38. #pragma omp parallel for simd reduction (inscan, foo:r) if (simd: 0)
  39. for (int i = 0; i < 1024; i++)
  40. {
  41. b[i] = r;
  42. #pragma omp scan exclusive(r)
  43. r += a[i];
  44. }
  45. }
  46. __attribute__((noipa)) int
  47. qux (void)
  48. {
  49. int &s = q;
  50. q = 0;
  51. #pragma omp parallel for simd reduction (inscan, foo:s)simdlen(1)
  52. for (int i = 0; i < 1024; i++)
  53. {
  54. b[i] = s;
  55. #pragma omp scan exclusive(s)
  56. s += 2 * a[i];
  57. }
  58. return s;
  59. }
  60. int
  61. main ()
  62. {
  63. int s = 0;
  64. for (int i = 0; i < 1024; ++i)
  65. {
  66. a[i] = i;
  67. b[i] = -1;
  68. asm ("" : "+g" (i));
  69. }
  70. #pragma omp parallel
  71. foo (a, b, r);
  72. if (r != 1024 * 1023 / 2)
  73. abort ();
  74. for (int i = 0; i < 1024; ++i)
  75. {
  76. if (b[i] != s)
  77. abort ();
  78. else
  79. b[i] = 25;
  80. s += i;
  81. }
  82. if (bar () != 1024 * 1023)
  83. abort ();
  84. s = 0;
  85. for (int i = 0; i < 1024; ++i)
  86. {
  87. if (b[i] != s)
  88. abort ();
  89. else
  90. b[i] = -1;
  91. s += 2 * i;
  92. }
  93. r = 0;
  94. baz (a, b, r);
  95. if (r != 1024 * 1023 / 2)
  96. abort ();
  97. s = 0;
  98. for (int i = 0; i < 1024; ++i)
  99. {
  100. if (b[i] != s)
  101. abort ();
  102. else
  103. b[i] = -25;
  104. s += i;
  105. }
  106. if (qux () != 1024 * 1023)
  107. abort ();
  108. s = 0;
  109. for (int i = 0; i < 1024; ++i)
  110. {
  111. if (b[i] != s)
  112. abort ();
  113. s += 2 * i;
  114. }
  115. }