loop.c 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185
  1. /* Copyright (C) 2005-2022 Free Software Foundation, Inc.
  2. Contributed by Richard Henderson <rth@redhat.com>.
  3. This file is part of the GNU Offloading and Multi Processing Library
  4. (libgomp).
  5. Libgomp is free software; you can redistribute it and/or modify it
  6. under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 3, or (at your option)
  8. any later version.
  9. Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
  10. WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  11. FOR A PARTICULAR PURPOSE. See the GNU General Public License for
  12. more details.
  13. Under Section 7 of GPL version 3, you are granted additional
  14. permissions described in the GCC Runtime Library Exception, version
  15. 3.1, as published by the Free Software Foundation.
  16. You should have received a copy of the GNU General Public License and
  17. a copy of the GCC Runtime Library Exception along with this program;
  18. see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
  19. <http://www.gnu.org/licenses/>. */
  20. /* This file handles the LOOP (FOR/DO) construct. */
  21. #include <limits.h>
  22. #include <stdlib.h>
  23. #include <string.h>
  24. #include "libgomp.h"
  25. ialias (GOMP_loop_runtime_next)
  26. ialias_redirect (GOMP_taskgroup_reduction_register)
  27. /* Initialize the given work share construct from the given arguments. */
  28. static inline void
  29. gomp_loop_init (struct gomp_work_share *ws, long start, long end, long incr,
  30. enum gomp_schedule_type sched, long chunk_size)
  31. {
  32. ws->sched = sched;
  33. ws->chunk_size = chunk_size;
  34. /* Canonicalize loops that have zero iterations to ->next == ->end. */
  35. ws->end = ((incr > 0 && start > end) || (incr < 0 && start < end))
  36. ? start : end;
  37. ws->incr = incr;
  38. ws->next = start;
  39. if (sched == GFS_DYNAMIC)
  40. {
  41. ws->chunk_size *= incr;
  42. #ifdef HAVE_SYNC_BUILTINS
  43. {
  44. /* For dynamic scheduling prepare things to make each iteration
  45. faster. */
  46. struct gomp_thread *thr = gomp_thread ();
  47. struct gomp_team *team = thr->ts.team;
  48. long nthreads = team ? team->nthreads : 1;
  49. if (__builtin_expect (incr > 0, 1))
  50. {
  51. /* Cheap overflow protection. */
  52. if (__builtin_expect ((nthreads | ws->chunk_size)
  53. >= 1UL << (sizeof (long)
  54. * __CHAR_BIT__ / 2 - 1), 0))
  55. ws->mode = 0;
  56. else
  57. ws->mode = ws->end < (LONG_MAX
  58. - (nthreads + 1) * ws->chunk_size);
  59. }
  60. /* Cheap overflow protection. */
  61. else if (__builtin_expect ((nthreads | -ws->chunk_size)
  62. >= 1UL << (sizeof (long)
  63. * __CHAR_BIT__ / 2 - 1), 0))
  64. ws->mode = 0;
  65. else
  66. ws->mode = ws->end > (nthreads + 1) * -ws->chunk_size - LONG_MAX;
  67. }
  68. #endif
  69. }
  70. }
  71. /* The *_start routines are called when first encountering a loop construct
  72. that is not bound directly to a parallel construct. The first thread
  73. that arrives will create the work-share construct; subsequent threads
  74. will see the construct exists and allocate work from it.
  75. START, END, INCR are the bounds of the loop; due to the restrictions of
  76. OpenMP, these values must be the same in every thread. This is not
  77. verified (nor is it entirely verifiable, since START is not necessarily
  78. retained intact in the work-share data structure). CHUNK_SIZE is the
  79. scheduling parameter; again this must be identical in all threads.
  80. Returns true if there's any work for this thread to perform. If so,
  81. *ISTART and *IEND are filled with the bounds of the iteration block
  82. allocated to this thread. Returns false if all work was assigned to
  83. other threads prior to this thread's arrival. */
  84. static bool
  85. gomp_loop_static_start (long start, long end, long incr, long chunk_size,
  86. long *istart, long *iend)
  87. {
  88. struct gomp_thread *thr = gomp_thread ();
  89. thr->ts.static_trip = 0;
  90. if (gomp_work_share_start (0))
  91. {
  92. gomp_loop_init (thr->ts.work_share, start, end, incr,
  93. GFS_STATIC, chunk_size);
  94. gomp_work_share_init_done ();
  95. }
  96. return !gomp_iter_static_next (istart, iend);
  97. }
  98. /* The current dynamic implementation is always monotonic. The
  99. entrypoints without nonmonotonic in them have to be always monotonic,
  100. but the nonmonotonic ones could be changed to use work-stealing for
  101. improved scalability. */
  102. static bool
  103. gomp_loop_dynamic_start (long start, long end, long incr, long chunk_size,
  104. long *istart, long *iend)
  105. {
  106. struct gomp_thread *thr = gomp_thread ();
  107. bool ret;
  108. if (gomp_work_share_start (0))
  109. {
  110. gomp_loop_init (thr->ts.work_share, start, end, incr,
  111. GFS_DYNAMIC, chunk_size);
  112. gomp_work_share_init_done ();
  113. }
  114. #ifdef HAVE_SYNC_BUILTINS
  115. ret = gomp_iter_dynamic_next (istart, iend);
  116. #else
  117. gomp_mutex_lock (&thr->ts.work_share->lock);
  118. ret = gomp_iter_dynamic_next_locked (istart, iend);
  119. gomp_mutex_unlock (&thr->ts.work_share->lock);
  120. #endif
  121. return ret;
  122. }
  123. /* Similarly as for dynamic, though the question is how can the chunk sizes
  124. be decreased without a central locking or atomics. */
  125. static bool
  126. gomp_loop_guided_start (long start, long end, long incr, long chunk_size,
  127. long *istart, long *iend)
  128. {
  129. struct gomp_thread *thr = gomp_thread ();
  130. bool ret;
  131. if (gomp_work_share_start (0))
  132. {
  133. gomp_loop_init (thr->ts.work_share, start, end, incr,
  134. GFS_GUIDED, chunk_size);
  135. gomp_work_share_init_done ();
  136. }
  137. #ifdef HAVE_SYNC_BUILTINS
  138. ret = gomp_iter_guided_next (istart, iend);
  139. #else
  140. gomp_mutex_lock (&thr->ts.work_share->lock);
  141. ret = gomp_iter_guided_next_locked (istart, iend);
  142. gomp_mutex_unlock (&thr->ts.work_share->lock);
  143. #endif
  144. return ret;
  145. }
  146. bool
  147. GOMP_loop_runtime_start (long start, long end, long incr,
  148. long *istart, long *iend)
  149. {
  150. struct gomp_task_icv *icv = gomp_icv (false);
  151. switch (icv->run_sched_var & ~GFS_MONOTONIC)
  152. {
  153. case GFS_STATIC:
  154. return gomp_loop_static_start (start, end, incr,
  155. icv->run_sched_chunk_size,
  156. istart, iend);
  157. case GFS_DYNAMIC:
  158. return gomp_loop_dynamic_start (start, end, incr,
  159. icv->run_sched_chunk_size,
  160. istart, iend);
  161. case GFS_GUIDED:
  162. return gomp_loop_guided_start (start, end, incr,
  163. icv->run_sched_chunk_size,
  164. istart, iend);
  165. case GFS_AUTO:
  166. /* For now map to schedule(static), later on we could play with feedback
  167. driven choice. */
  168. return gomp_loop_static_start (start, end, incr, 0, istart, iend);
  169. default:
  170. abort ();
  171. }
  172. }
  173. static long
  174. gomp_adjust_sched (long sched, long *chunk_size)
  175. {
  176. sched &= ~GFS_MONOTONIC;
  177. switch (sched)
  178. {
  179. case GFS_STATIC:
  180. case GFS_DYNAMIC:
  181. case GFS_GUIDED:
  182. return sched;
  183. /* GFS_RUNTIME is used for runtime schedule without monotonic
  184. or nonmonotonic modifiers on the clause.
  185. GFS_RUNTIME|GFS_MONOTONIC for runtime schedule with monotonic
  186. modifier. */
  187. case GFS_RUNTIME:
  188. /* GFS_AUTO is used for runtime schedule with nonmonotonic
  189. modifier. */
  190. case GFS_AUTO:
  191. {
  192. struct gomp_task_icv *icv = gomp_icv (false);
  193. sched = icv->run_sched_var & ~GFS_MONOTONIC;
  194. switch (sched)
  195. {
  196. case GFS_STATIC:
  197. case GFS_DYNAMIC:
  198. case GFS_GUIDED:
  199. *chunk_size = icv->run_sched_chunk_size;
  200. break;
  201. case GFS_AUTO:
  202. sched = GFS_STATIC;
  203. *chunk_size = 0;
  204. break;
  205. default:
  206. abort ();
  207. }
  208. return sched;
  209. }
  210. default:
  211. abort ();
  212. }
  213. }
  214. bool
  215. GOMP_loop_start (long start, long end, long incr, long sched,
  216. long chunk_size, long *istart, long *iend,
  217. uintptr_t *reductions, void **mem)
  218. {
  219. struct gomp_thread *thr = gomp_thread ();
  220. thr->ts.static_trip = 0;
  221. if (reductions)
  222. gomp_workshare_taskgroup_start ();
  223. if (gomp_work_share_start (0))
  224. {
  225. sched = gomp_adjust_sched (sched, &chunk_size);
  226. gomp_loop_init (thr->ts.work_share, start, end, incr,
  227. sched, chunk_size);
  228. if (reductions)
  229. {
  230. GOMP_taskgroup_reduction_register (reductions);
  231. thr->task->taskgroup->workshare = true;
  232. thr->ts.work_share->task_reductions = reductions;
  233. }
  234. if (mem)
  235. {
  236. uintptr_t size = (uintptr_t) *mem;
  237. #define INLINE_ORDERED_TEAM_IDS_OFF \
  238. ((offsetof (struct gomp_work_share, inline_ordered_team_ids) \
  239. + __alignof__ (long long) - 1) & ~(__alignof__ (long long) - 1))
  240. if (size > (sizeof (struct gomp_work_share)
  241. - INLINE_ORDERED_TEAM_IDS_OFF))
  242. *mem
  243. = (void *) (thr->ts.work_share->ordered_team_ids
  244. = gomp_malloc_cleared (size));
  245. else
  246. *mem = memset (((char *) thr->ts.work_share)
  247. + INLINE_ORDERED_TEAM_IDS_OFF, '\0', size);
  248. }
  249. gomp_work_share_init_done ();
  250. }
  251. else
  252. {
  253. if (reductions)
  254. {
  255. uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
  256. gomp_workshare_task_reduction_register (reductions,
  257. first_reductions);
  258. }
  259. if (mem)
  260. {
  261. if ((offsetof (struct gomp_work_share, inline_ordered_team_ids)
  262. & (__alignof__ (long long) - 1)) == 0)
  263. *mem = (void *) thr->ts.work_share->ordered_team_ids;
  264. else
  265. {
  266. uintptr_t p = (uintptr_t) thr->ts.work_share->ordered_team_ids;
  267. p += __alignof__ (long long) - 1;
  268. p &= ~(__alignof__ (long long) - 1);
  269. *mem = (void *) p;
  270. }
  271. }
  272. }
  273. if (!istart)
  274. return true;
  275. return ialias_call (GOMP_loop_runtime_next) (istart, iend);
  276. }
  277. /* The *_ordered_*_start routines are similar. The only difference is that
  278. this work-share construct is initialized to expect an ORDERED section. */
  279. static bool
  280. gomp_loop_ordered_static_start (long start, long end, long incr,
  281. long chunk_size, long *istart, long *iend)
  282. {
  283. struct gomp_thread *thr = gomp_thread ();
  284. thr->ts.static_trip = 0;
  285. if (gomp_work_share_start (1))
  286. {
  287. gomp_loop_init (thr->ts.work_share, start, end, incr,
  288. GFS_STATIC, chunk_size);
  289. gomp_ordered_static_init ();
  290. gomp_work_share_init_done ();
  291. }
  292. return !gomp_iter_static_next (istart, iend);
  293. }
  294. static bool
  295. gomp_loop_ordered_dynamic_start (long start, long end, long incr,
  296. long chunk_size, long *istart, long *iend)
  297. {
  298. struct gomp_thread *thr = gomp_thread ();
  299. bool ret;
  300. if (gomp_work_share_start (1))
  301. {
  302. gomp_loop_init (thr->ts.work_share, start, end, incr,
  303. GFS_DYNAMIC, chunk_size);
  304. gomp_mutex_lock (&thr->ts.work_share->lock);
  305. gomp_work_share_init_done ();
  306. }
  307. else
  308. gomp_mutex_lock (&thr->ts.work_share->lock);
  309. ret = gomp_iter_dynamic_next_locked (istart, iend);
  310. if (ret)
  311. gomp_ordered_first ();
  312. gomp_mutex_unlock (&thr->ts.work_share->lock);
  313. return ret;
  314. }
  315. static bool
  316. gomp_loop_ordered_guided_start (long start, long end, long incr,
  317. long chunk_size, long *istart, long *iend)
  318. {
  319. struct gomp_thread *thr = gomp_thread ();
  320. bool ret;
  321. if (gomp_work_share_start (1))
  322. {
  323. gomp_loop_init (thr->ts.work_share, start, end, incr,
  324. GFS_GUIDED, chunk_size);
  325. gomp_mutex_lock (&thr->ts.work_share->lock);
  326. gomp_work_share_init_done ();
  327. }
  328. else
  329. gomp_mutex_lock (&thr->ts.work_share->lock);
  330. ret = gomp_iter_guided_next_locked (istart, iend);
  331. if (ret)
  332. gomp_ordered_first ();
  333. gomp_mutex_unlock (&thr->ts.work_share->lock);
  334. return ret;
  335. }
  336. bool
  337. GOMP_loop_ordered_runtime_start (long start, long end, long incr,
  338. long *istart, long *iend)
  339. {
  340. struct gomp_task_icv *icv = gomp_icv (false);
  341. switch (icv->run_sched_var & ~GFS_MONOTONIC)
  342. {
  343. case GFS_STATIC:
  344. return gomp_loop_ordered_static_start (start, end, incr,
  345. icv->run_sched_chunk_size,
  346. istart, iend);
  347. case GFS_DYNAMIC:
  348. return gomp_loop_ordered_dynamic_start (start, end, incr,
  349. icv->run_sched_chunk_size,
  350. istart, iend);
  351. case GFS_GUIDED:
  352. return gomp_loop_ordered_guided_start (start, end, incr,
  353. icv->run_sched_chunk_size,
  354. istart, iend);
  355. case GFS_AUTO:
  356. /* For now map to schedule(static), later on we could play with feedback
  357. driven choice. */
  358. return gomp_loop_ordered_static_start (start, end, incr,
  359. 0, istart, iend);
  360. default:
  361. abort ();
  362. }
  363. }
  364. bool
  365. GOMP_loop_ordered_start (long start, long end, long incr, long sched,
  366. long chunk_size, long *istart, long *iend,
  367. uintptr_t *reductions, void **mem)
  368. {
  369. struct gomp_thread *thr = gomp_thread ();
  370. size_t ordered = 1;
  371. bool ret;
  372. thr->ts.static_trip = 0;
  373. if (reductions)
  374. gomp_workshare_taskgroup_start ();
  375. if (mem)
  376. ordered += (uintptr_t) *mem;
  377. if (gomp_work_share_start (ordered))
  378. {
  379. sched = gomp_adjust_sched (sched, &chunk_size);
  380. gomp_loop_init (thr->ts.work_share, start, end, incr,
  381. sched, chunk_size);
  382. if (reductions)
  383. {
  384. GOMP_taskgroup_reduction_register (reductions);
  385. thr->task->taskgroup->workshare = true;
  386. thr->ts.work_share->task_reductions = reductions;
  387. }
  388. if (sched == GFS_STATIC)
  389. gomp_ordered_static_init ();
  390. else
  391. gomp_mutex_lock (&thr->ts.work_share->lock);
  392. gomp_work_share_init_done ();
  393. }
  394. else
  395. {
  396. if (reductions)
  397. {
  398. uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
  399. gomp_workshare_task_reduction_register (reductions,
  400. first_reductions);
  401. }
  402. sched = thr->ts.work_share->sched;
  403. if (sched != GFS_STATIC)
  404. gomp_mutex_lock (&thr->ts.work_share->lock);
  405. }
  406. if (mem)
  407. {
  408. uintptr_t p
  409. = (uintptr_t) (thr->ts.work_share->ordered_team_ids
  410. + (thr->ts.team ? thr->ts.team->nthreads : 1));
  411. p += __alignof__ (long long) - 1;
  412. p &= ~(__alignof__ (long long) - 1);
  413. *mem = (void *) p;
  414. }
  415. switch (sched)
  416. {
  417. case GFS_STATIC:
  418. case GFS_AUTO:
  419. return !gomp_iter_static_next (istart, iend);
  420. case GFS_DYNAMIC:
  421. ret = gomp_iter_dynamic_next_locked (istart, iend);
  422. break;
  423. case GFS_GUIDED:
  424. ret = gomp_iter_guided_next_locked (istart, iend);
  425. break;
  426. default:
  427. abort ();
  428. }
  429. if (ret)
  430. gomp_ordered_first ();
  431. gomp_mutex_unlock (&thr->ts.work_share->lock);
  432. return ret;
  433. }
  434. /* The *_doacross_*_start routines are similar. The only difference is that
  435. this work-share construct is initialized to expect an ORDERED(N) - DOACROSS
  436. section, and the worksharing loop iterates always from 0 to COUNTS[0] - 1
  437. and other COUNTS array elements tell the library number of iterations
  438. in the ordered inner loops. */
  439. static bool
  440. gomp_loop_doacross_static_start (unsigned ncounts, long *counts,
  441. long chunk_size, long *istart, long *iend)
  442. {
  443. struct gomp_thread *thr = gomp_thread ();
  444. thr->ts.static_trip = 0;
  445. if (gomp_work_share_start (0))
  446. {
  447. gomp_loop_init (thr->ts.work_share, 0, counts[0], 1,
  448. GFS_STATIC, chunk_size);
  449. gomp_doacross_init (ncounts, counts, chunk_size, 0);
  450. gomp_work_share_init_done ();
  451. }
  452. return !gomp_iter_static_next (istart, iend);
  453. }
  454. static bool
  455. gomp_loop_doacross_dynamic_start (unsigned ncounts, long *counts,
  456. long chunk_size, long *istart, long *iend)
  457. {
  458. struct gomp_thread *thr = gomp_thread ();
  459. bool ret;
  460. if (gomp_work_share_start (0))
  461. {
  462. gomp_loop_init (thr->ts.work_share, 0, counts[0], 1,
  463. GFS_DYNAMIC, chunk_size);
  464. gomp_doacross_init (ncounts, counts, chunk_size, 0);
  465. gomp_work_share_init_done ();
  466. }
  467. #ifdef HAVE_SYNC_BUILTINS
  468. ret = gomp_iter_dynamic_next (istart, iend);
  469. #else
  470. gomp_mutex_lock (&thr->ts.work_share->lock);
  471. ret = gomp_iter_dynamic_next_locked (istart, iend);
  472. gomp_mutex_unlock (&thr->ts.work_share->lock);
  473. #endif
  474. return ret;
  475. }
  476. static bool
  477. gomp_loop_doacross_guided_start (unsigned ncounts, long *counts,
  478. long chunk_size, long *istart, long *iend)
  479. {
  480. struct gomp_thread *thr = gomp_thread ();
  481. bool ret;
  482. if (gomp_work_share_start (0))
  483. {
  484. gomp_loop_init (thr->ts.work_share, 0, counts[0], 1,
  485. GFS_GUIDED, chunk_size);
  486. gomp_doacross_init (ncounts, counts, chunk_size, 0);
  487. gomp_work_share_init_done ();
  488. }
  489. #ifdef HAVE_SYNC_BUILTINS
  490. ret = gomp_iter_guided_next (istart, iend);
  491. #else
  492. gomp_mutex_lock (&thr->ts.work_share->lock);
  493. ret = gomp_iter_guided_next_locked (istart, iend);
  494. gomp_mutex_unlock (&thr->ts.work_share->lock);
  495. #endif
  496. return ret;
  497. }
  498. bool
  499. GOMP_loop_doacross_runtime_start (unsigned ncounts, long *counts,
  500. long *istart, long *iend)
  501. {
  502. struct gomp_task_icv *icv = gomp_icv (false);
  503. switch (icv->run_sched_var & ~GFS_MONOTONIC)
  504. {
  505. case GFS_STATIC:
  506. return gomp_loop_doacross_static_start (ncounts, counts,
  507. icv->run_sched_chunk_size,
  508. istart, iend);
  509. case GFS_DYNAMIC:
  510. return gomp_loop_doacross_dynamic_start (ncounts, counts,
  511. icv->run_sched_chunk_size,
  512. istart, iend);
  513. case GFS_GUIDED:
  514. return gomp_loop_doacross_guided_start (ncounts, counts,
  515. icv->run_sched_chunk_size,
  516. istart, iend);
  517. case GFS_AUTO:
  518. /* For now map to schedule(static), later on we could play with feedback
  519. driven choice. */
  520. return gomp_loop_doacross_static_start (ncounts, counts,
  521. 0, istart, iend);
  522. default:
  523. abort ();
  524. }
  525. }
  526. bool
  527. GOMP_loop_doacross_start (unsigned ncounts, long *counts, long sched,
  528. long chunk_size, long *istart, long *iend,
  529. uintptr_t *reductions, void **mem)
  530. {
  531. struct gomp_thread *thr = gomp_thread ();
  532. thr->ts.static_trip = 0;
  533. if (reductions)
  534. gomp_workshare_taskgroup_start ();
  535. if (gomp_work_share_start (0))
  536. {
  537. size_t extra = 0;
  538. if (mem)
  539. extra = (uintptr_t) *mem;
  540. sched = gomp_adjust_sched (sched, &chunk_size);
  541. gomp_loop_init (thr->ts.work_share, 0, counts[0], 1,
  542. sched, chunk_size);
  543. gomp_doacross_init (ncounts, counts, chunk_size, extra);
  544. if (reductions)
  545. {
  546. GOMP_taskgroup_reduction_register (reductions);
  547. thr->task->taskgroup->workshare = true;
  548. thr->ts.work_share->task_reductions = reductions;
  549. }
  550. gomp_work_share_init_done ();
  551. }
  552. else
  553. {
  554. if (reductions)
  555. {
  556. uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
  557. gomp_workshare_task_reduction_register (reductions,
  558. first_reductions);
  559. }
  560. sched = thr->ts.work_share->sched;
  561. }
  562. if (mem)
  563. *mem = thr->ts.work_share->doacross->extra;
  564. return ialias_call (GOMP_loop_runtime_next) (istart, iend);
  565. }
  566. /* The *_next routines are called when the thread completes processing of
  567. the iteration block currently assigned to it. If the work-share
  568. construct is bound directly to a parallel construct, then the iteration
  569. bounds may have been set up before the parallel. In which case, this
  570. may be the first iteration for the thread.
  571. Returns true if there is work remaining to be performed; *ISTART and
  572. *IEND are filled with a new iteration block. Returns false if all work
  573. has been assigned. */
  574. static bool
  575. gomp_loop_static_next (long *istart, long *iend)
  576. {
  577. return !gomp_iter_static_next (istart, iend);
  578. }
  579. static bool
  580. gomp_loop_dynamic_next (long *istart, long *iend)
  581. {
  582. bool ret;
  583. #ifdef HAVE_SYNC_BUILTINS
  584. ret = gomp_iter_dynamic_next (istart, iend);
  585. #else
  586. struct gomp_thread *thr = gomp_thread ();
  587. gomp_mutex_lock (&thr->ts.work_share->lock);
  588. ret = gomp_iter_dynamic_next_locked (istart, iend);
  589. gomp_mutex_unlock (&thr->ts.work_share->lock);
  590. #endif
  591. return ret;
  592. }
  593. static bool
  594. gomp_loop_guided_next (long *istart, long *iend)
  595. {
  596. bool ret;
  597. #ifdef HAVE_SYNC_BUILTINS
  598. ret = gomp_iter_guided_next (istart, iend);
  599. #else
  600. struct gomp_thread *thr = gomp_thread ();
  601. gomp_mutex_lock (&thr->ts.work_share->lock);
  602. ret = gomp_iter_guided_next_locked (istart, iend);
  603. gomp_mutex_unlock (&thr->ts.work_share->lock);
  604. #endif
  605. return ret;
  606. }
  607. bool
  608. GOMP_loop_runtime_next (long *istart, long *iend)
  609. {
  610. struct gomp_thread *thr = gomp_thread ();
  611. switch (thr->ts.work_share->sched)
  612. {
  613. case GFS_STATIC:
  614. case GFS_AUTO:
  615. return gomp_loop_static_next (istart, iend);
  616. case GFS_DYNAMIC:
  617. return gomp_loop_dynamic_next (istart, iend);
  618. case GFS_GUIDED:
  619. return gomp_loop_guided_next (istart, iend);
  620. default:
  621. abort ();
  622. }
  623. }
  624. /* The *_ordered_*_next routines are called when the thread completes
  625. processing of the iteration block currently assigned to it.
  626. Returns true if there is work remaining to be performed; *ISTART and
  627. *IEND are filled with a new iteration block. Returns false if all work
  628. has been assigned. */
  629. static bool
  630. gomp_loop_ordered_static_next (long *istart, long *iend)
  631. {
  632. struct gomp_thread *thr = gomp_thread ();
  633. int test;
  634. gomp_ordered_sync ();
  635. gomp_mutex_lock (&thr->ts.work_share->lock);
  636. test = gomp_iter_static_next (istart, iend);
  637. if (test >= 0)
  638. gomp_ordered_static_next ();
  639. gomp_mutex_unlock (&thr->ts.work_share->lock);
  640. return test == 0;
  641. }
  642. static bool
  643. gomp_loop_ordered_dynamic_next (long *istart, long *iend)
  644. {
  645. struct gomp_thread *thr = gomp_thread ();
  646. bool ret;
  647. gomp_ordered_sync ();
  648. gomp_mutex_lock (&thr->ts.work_share->lock);
  649. ret = gomp_iter_dynamic_next_locked (istart, iend);
  650. if (ret)
  651. gomp_ordered_next ();
  652. else
  653. gomp_ordered_last ();
  654. gomp_mutex_unlock (&thr->ts.work_share->lock);
  655. return ret;
  656. }
  657. static bool
  658. gomp_loop_ordered_guided_next (long *istart, long *iend)
  659. {
  660. struct gomp_thread *thr = gomp_thread ();
  661. bool ret;
  662. gomp_ordered_sync ();
  663. gomp_mutex_lock (&thr->ts.work_share->lock);
  664. ret = gomp_iter_guided_next_locked (istart, iend);
  665. if (ret)
  666. gomp_ordered_next ();
  667. else
  668. gomp_ordered_last ();
  669. gomp_mutex_unlock (&thr->ts.work_share->lock);
  670. return ret;
  671. }
  672. bool
  673. GOMP_loop_ordered_runtime_next (long *istart, long *iend)
  674. {
  675. struct gomp_thread *thr = gomp_thread ();
  676. switch (thr->ts.work_share->sched)
  677. {
  678. case GFS_STATIC:
  679. case GFS_AUTO:
  680. return gomp_loop_ordered_static_next (istart, iend);
  681. case GFS_DYNAMIC:
  682. return gomp_loop_ordered_dynamic_next (istart, iend);
  683. case GFS_GUIDED:
  684. return gomp_loop_ordered_guided_next (istart, iend);
  685. default:
  686. abort ();
  687. }
  688. }
  689. /* The GOMP_parallel_loop_* routines pre-initialize a work-share construct
  690. to avoid one synchronization once we get into the loop. */
  691. static void
  692. gomp_parallel_loop_start (void (*fn) (void *), void *data,
  693. unsigned num_threads, long start, long end,
  694. long incr, enum gomp_schedule_type sched,
  695. long chunk_size, unsigned int flags)
  696. {
  697. struct gomp_team *team;
  698. num_threads = gomp_resolve_num_threads (num_threads, 0);
  699. team = gomp_new_team (num_threads);
  700. gomp_loop_init (&team->work_shares[0], start, end, incr, sched, chunk_size);
  701. gomp_team_start (fn, data, num_threads, flags, team, NULL);
  702. }
  703. void
  704. GOMP_parallel_loop_static_start (void (*fn) (void *), void *data,
  705. unsigned num_threads, long start, long end,
  706. long incr, long chunk_size)
  707. {
  708. gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
  709. GFS_STATIC, chunk_size, 0);
  710. }
  711. void
  712. GOMP_parallel_loop_dynamic_start (void (*fn) (void *), void *data,
  713. unsigned num_threads, long start, long end,
  714. long incr, long chunk_size)
  715. {
  716. gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
  717. GFS_DYNAMIC, chunk_size, 0);
  718. }
  719. void
  720. GOMP_parallel_loop_guided_start (void (*fn) (void *), void *data,
  721. unsigned num_threads, long start, long end,
  722. long incr, long chunk_size)
  723. {
  724. gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
  725. GFS_GUIDED, chunk_size, 0);
  726. }
  727. void
  728. GOMP_parallel_loop_runtime_start (void (*fn) (void *), void *data,
  729. unsigned num_threads, long start, long end,
  730. long incr)
  731. {
  732. struct gomp_task_icv *icv = gomp_icv (false);
  733. gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
  734. icv->run_sched_var & ~GFS_MONOTONIC,
  735. icv->run_sched_chunk_size, 0);
  736. }
  737. ialias_redirect (GOMP_parallel_end)
  738. void
  739. GOMP_parallel_loop_static (void (*fn) (void *), void *data,
  740. unsigned num_threads, long start, long end,
  741. long incr, long chunk_size, unsigned flags)
  742. {
  743. gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
  744. GFS_STATIC, chunk_size, flags);
  745. fn (data);
  746. GOMP_parallel_end ();
  747. }
  748. void
  749. GOMP_parallel_loop_dynamic (void (*fn) (void *), void *data,
  750. unsigned num_threads, long start, long end,
  751. long incr, long chunk_size, unsigned flags)
  752. {
  753. gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
  754. GFS_DYNAMIC, chunk_size, flags);
  755. fn (data);
  756. GOMP_parallel_end ();
  757. }
  758. void
  759. GOMP_parallel_loop_guided (void (*fn) (void *), void *data,
  760. unsigned num_threads, long start, long end,
  761. long incr, long chunk_size, unsigned flags)
  762. {
  763. gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
  764. GFS_GUIDED, chunk_size, flags);
  765. fn (data);
  766. GOMP_parallel_end ();
  767. }
  768. void
  769. GOMP_parallel_loop_runtime (void (*fn) (void *), void *data,
  770. unsigned num_threads, long start, long end,
  771. long incr, unsigned flags)
  772. {
  773. struct gomp_task_icv *icv = gomp_icv (false);
  774. gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
  775. icv->run_sched_var & ~GFS_MONOTONIC,
  776. icv->run_sched_chunk_size, flags);
  777. fn (data);
  778. GOMP_parallel_end ();
  779. }
  780. #ifdef HAVE_ATTRIBUTE_ALIAS
  781. extern __typeof(GOMP_parallel_loop_dynamic) GOMP_parallel_loop_nonmonotonic_dynamic
  782. __attribute__((alias ("GOMP_parallel_loop_dynamic")));
  783. extern __typeof(GOMP_parallel_loop_guided) GOMP_parallel_loop_nonmonotonic_guided
  784. __attribute__((alias ("GOMP_parallel_loop_guided")));
  785. extern __typeof(GOMP_parallel_loop_runtime) GOMP_parallel_loop_nonmonotonic_runtime
  786. __attribute__((alias ("GOMP_parallel_loop_runtime")));
  787. extern __typeof(GOMP_parallel_loop_runtime) GOMP_parallel_loop_maybe_nonmonotonic_runtime
  788. __attribute__((alias ("GOMP_parallel_loop_runtime")));
  789. #else
  790. void
  791. GOMP_parallel_loop_nonmonotonic_dynamic (void (*fn) (void *), void *data,
  792. unsigned num_threads, long start,
  793. long end, long incr, long chunk_size,
  794. unsigned flags)
  795. {
  796. gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
  797. GFS_DYNAMIC, chunk_size, flags);
  798. fn (data);
  799. GOMP_parallel_end ();
  800. }
  801. void
  802. GOMP_parallel_loop_nonmonotonic_guided (void (*fn) (void *), void *data,
  803. unsigned num_threads, long start,
  804. long end, long incr, long chunk_size,
  805. unsigned flags)
  806. {
  807. gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
  808. GFS_GUIDED, chunk_size, flags);
  809. fn (data);
  810. GOMP_parallel_end ();
  811. }
  812. void
  813. GOMP_parallel_loop_nonmonotonic_runtime (void (*fn) (void *), void *data,
  814. unsigned num_threads, long start,
  815. long end, long incr, unsigned flags)
  816. {
  817. struct gomp_task_icv *icv = gomp_icv (false);
  818. gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
  819. icv->run_sched_var & ~GFS_MONOTONIC,
  820. icv->run_sched_chunk_size, flags);
  821. fn (data);
  822. GOMP_parallel_end ();
  823. }
  824. void
  825. GOMP_parallel_loop_maybe_nonmonotonic_runtime (void (*fn) (void *), void *data,
  826. unsigned num_threads, long start,
  827. long end, long incr,
  828. unsigned flags)
  829. {
  830. struct gomp_task_icv *icv = gomp_icv (false);
  831. gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
  832. icv->run_sched_var & ~GFS_MONOTONIC,
  833. icv->run_sched_chunk_size, flags);
  834. fn (data);
  835. GOMP_parallel_end ();
  836. }
  837. #endif
  838. /* The GOMP_loop_end* routines are called after the thread is told that
  839. all loop iterations are complete. The first two versions synchronize
  840. all threads; the nowait version does not. */
  841. void
  842. GOMP_loop_end (void)
  843. {
  844. gomp_work_share_end ();
  845. }
  846. bool
  847. GOMP_loop_end_cancel (void)
  848. {
  849. return gomp_work_share_end_cancel ();
  850. }
  851. void
  852. GOMP_loop_end_nowait (void)
  853. {
  854. gomp_work_share_end_nowait ();
  855. }
  856. /* We use static functions above so that we're sure that the "runtime"
  857. function can defer to the proper routine without interposition. We
  858. export the static function with a strong alias when possible, or with
  859. a wrapper function otherwise. */
  860. #ifdef HAVE_ATTRIBUTE_ALIAS
  861. extern __typeof(gomp_loop_static_start) GOMP_loop_static_start
  862. __attribute__((alias ("gomp_loop_static_start")));
  863. extern __typeof(gomp_loop_dynamic_start) GOMP_loop_dynamic_start
  864. __attribute__((alias ("gomp_loop_dynamic_start")));
  865. extern __typeof(gomp_loop_guided_start) GOMP_loop_guided_start
  866. __attribute__((alias ("gomp_loop_guided_start")));
  867. extern __typeof(gomp_loop_dynamic_start) GOMP_loop_nonmonotonic_dynamic_start
  868. __attribute__((alias ("gomp_loop_dynamic_start")));
  869. extern __typeof(gomp_loop_guided_start) GOMP_loop_nonmonotonic_guided_start
  870. __attribute__((alias ("gomp_loop_guided_start")));
  871. extern __typeof(GOMP_loop_runtime_start) GOMP_loop_nonmonotonic_runtime_start
  872. __attribute__((alias ("GOMP_loop_runtime_start")));
  873. extern __typeof(GOMP_loop_runtime_start) GOMP_loop_maybe_nonmonotonic_runtime_start
  874. __attribute__((alias ("GOMP_loop_runtime_start")));
  875. extern __typeof(gomp_loop_ordered_static_start) GOMP_loop_ordered_static_start
  876. __attribute__((alias ("gomp_loop_ordered_static_start")));
  877. extern __typeof(gomp_loop_ordered_dynamic_start) GOMP_loop_ordered_dynamic_start
  878. __attribute__((alias ("gomp_loop_ordered_dynamic_start")));
  879. extern __typeof(gomp_loop_ordered_guided_start) GOMP_loop_ordered_guided_start
  880. __attribute__((alias ("gomp_loop_ordered_guided_start")));
  881. extern __typeof(gomp_loop_doacross_static_start) GOMP_loop_doacross_static_start
  882. __attribute__((alias ("gomp_loop_doacross_static_start")));
  883. extern __typeof(gomp_loop_doacross_dynamic_start) GOMP_loop_doacross_dynamic_start
  884. __attribute__((alias ("gomp_loop_doacross_dynamic_start")));
  885. extern __typeof(gomp_loop_doacross_guided_start) GOMP_loop_doacross_guided_start
  886. __attribute__((alias ("gomp_loop_doacross_guided_start")));
  887. extern __typeof(gomp_loop_static_next) GOMP_loop_static_next
  888. __attribute__((alias ("gomp_loop_static_next")));
  889. extern __typeof(gomp_loop_dynamic_next) GOMP_loop_dynamic_next
  890. __attribute__((alias ("gomp_loop_dynamic_next")));
  891. extern __typeof(gomp_loop_guided_next) GOMP_loop_guided_next
  892. __attribute__((alias ("gomp_loop_guided_next")));
  893. extern __typeof(gomp_loop_dynamic_next) GOMP_loop_nonmonotonic_dynamic_next
  894. __attribute__((alias ("gomp_loop_dynamic_next")));
  895. extern __typeof(gomp_loop_guided_next) GOMP_loop_nonmonotonic_guided_next
  896. __attribute__((alias ("gomp_loop_guided_next")));
  897. extern __typeof(GOMP_loop_runtime_next) GOMP_loop_nonmonotonic_runtime_next
  898. __attribute__((alias ("GOMP_loop_runtime_next")));
  899. extern __typeof(GOMP_loop_runtime_next) GOMP_loop_maybe_nonmonotonic_runtime_next
  900. __attribute__((alias ("GOMP_loop_runtime_next")));
  901. extern __typeof(gomp_loop_ordered_static_next) GOMP_loop_ordered_static_next
  902. __attribute__((alias ("gomp_loop_ordered_static_next")));
  903. extern __typeof(gomp_loop_ordered_dynamic_next) GOMP_loop_ordered_dynamic_next
  904. __attribute__((alias ("gomp_loop_ordered_dynamic_next")));
  905. extern __typeof(gomp_loop_ordered_guided_next) GOMP_loop_ordered_guided_next
  906. __attribute__((alias ("gomp_loop_ordered_guided_next")));
  907. #else
  908. bool
  909. GOMP_loop_static_start (long start, long end, long incr, long chunk_size,
  910. long *istart, long *iend)
  911. {
  912. return gomp_loop_static_start (start, end, incr, chunk_size, istart, iend);
  913. }
  914. bool
  915. GOMP_loop_dynamic_start (long start, long end, long incr, long chunk_size,
  916. long *istart, long *iend)
  917. {
  918. return gomp_loop_dynamic_start (start, end, incr, chunk_size, istart, iend);
  919. }
  920. bool
  921. GOMP_loop_guided_start (long start, long end, long incr, long chunk_size,
  922. long *istart, long *iend)
  923. {
  924. return gomp_loop_guided_start (start, end, incr, chunk_size, istart, iend);
  925. }
  926. bool
  927. GOMP_loop_nonmonotonic_dynamic_start (long start, long end, long incr,
  928. long chunk_size, long *istart,
  929. long *iend)
  930. {
  931. return gomp_loop_dynamic_start (start, end, incr, chunk_size, istart, iend);
  932. }
  933. bool
  934. GOMP_loop_nonmonotonic_guided_start (long start, long end, long incr,
  935. long chunk_size, long *istart, long *iend)
  936. {
  937. return gomp_loop_guided_start (start, end, incr, chunk_size, istart, iend);
  938. }
  939. bool
  940. GOMP_loop_nonmonotonic_runtime_start (long start, long end, long incr,
  941. long *istart, long *iend)
  942. {
  943. return GOMP_loop_runtime_start (start, end, incr, istart, iend);
  944. }
  945. bool
  946. GOMP_loop_maybe_nonmonotonic_runtime_start (long start, long end, long incr,
  947. long *istart, long *iend)
  948. {
  949. return GOMP_loop_runtime_start (start, end, incr, istart, iend);
  950. }
  951. bool
  952. GOMP_loop_ordered_static_start (long start, long end, long incr,
  953. long chunk_size, long *istart, long *iend)
  954. {
  955. return gomp_loop_ordered_static_start (start, end, incr, chunk_size,
  956. istart, iend);
  957. }
  958. bool
  959. GOMP_loop_ordered_dynamic_start (long start, long end, long incr,
  960. long chunk_size, long *istart, long *iend)
  961. {
  962. return gomp_loop_ordered_dynamic_start (start, end, incr, chunk_size,
  963. istart, iend);
  964. }
  965. bool
  966. GOMP_loop_ordered_guided_start (long start, long end, long incr,
  967. long chunk_size, long *istart, long *iend)
  968. {
  969. return gomp_loop_ordered_guided_start (start, end, incr, chunk_size,
  970. istart, iend);
  971. }
  972. bool
  973. GOMP_loop_doacross_static_start (unsigned ncounts, long *counts,
  974. long chunk_size, long *istart, long *iend)
  975. {
  976. return gomp_loop_doacross_static_start (ncounts, counts, chunk_size,
  977. istart, iend);
  978. }
  979. bool
  980. GOMP_loop_doacross_dynamic_start (unsigned ncounts, long *counts,
  981. long chunk_size, long *istart, long *iend)
  982. {
  983. return gomp_loop_doacross_dynamic_start (ncounts, counts, chunk_size,
  984. istart, iend);
  985. }
  986. bool
  987. GOMP_loop_doacross_guided_start (unsigned ncounts, long *counts,
  988. long chunk_size, long *istart, long *iend)
  989. {
  990. return gomp_loop_doacross_guided_start (ncounts, counts, chunk_size,
  991. istart, iend);
  992. }
  993. bool
  994. GOMP_loop_static_next (long *istart, long *iend)
  995. {
  996. return gomp_loop_static_next (istart, iend);
  997. }
  998. bool
  999. GOMP_loop_dynamic_next (long *istart, long *iend)
  1000. {
  1001. return gomp_loop_dynamic_next (istart, iend);
  1002. }
  1003. bool
  1004. GOMP_loop_guided_next (long *istart, long *iend)
  1005. {
  1006. return gomp_loop_guided_next (istart, iend);
  1007. }
  1008. bool
  1009. GOMP_loop_nonmonotonic_dynamic_next (long *istart, long *iend)
  1010. {
  1011. return gomp_loop_dynamic_next (istart, iend);
  1012. }
  1013. bool
  1014. GOMP_loop_nonmonotonic_guided_next (long *istart, long *iend)
  1015. {
  1016. return gomp_loop_guided_next (istart, iend);
  1017. }
  1018. bool
  1019. GOMP_loop_nonmonotonic_runtime_next (long *istart, long *iend)
  1020. {
  1021. return GOMP_loop_runtime_next (istart, iend);
  1022. }
  1023. bool
  1024. GOMP_loop_maybe_nonmonotonic_runtime_next (long *istart, long *iend)
  1025. {
  1026. return GOMP_loop_runtime_next (istart, iend);
  1027. }
  1028. bool
  1029. GOMP_loop_ordered_static_next (long *istart, long *iend)
  1030. {
  1031. return gomp_loop_ordered_static_next (istart, iend);
  1032. }
  1033. bool
  1034. GOMP_loop_ordered_dynamic_next (long *istart, long *iend)
  1035. {
  1036. return gomp_loop_ordered_dynamic_next (istart, iend);
  1037. }
  1038. bool
  1039. GOMP_loop_ordered_guided_next (long *istart, long *iend)
  1040. {
  1041. return gomp_loop_ordered_guided_next (istart, iend);
  1042. }
  1043. #endif