taskloop.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400
  1. /* Copyright (C) 2015-2022 Free Software Foundation, Inc.
  2. Contributed by Jakub Jelinek <jakub@redhat.com>.
  3. This file is part of the GNU Offloading and Multi Processing Library
  4. (libgomp).
  5. Libgomp is free software; you can redistribute it and/or modify it
  6. under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 3, or (at your option)
  8. any later version.
  9. Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
  10. WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  11. FOR A PARTICULAR PURPOSE. See the GNU General Public License for
  12. more details.
  13. Under Section 7 of GPL version 3, you are granted additional
  14. permissions described in the GCC Runtime Library Exception, version
  15. 3.1, as published by the Free Software Foundation.
  16. You should have received a copy of the GNU General Public License and
  17. a copy of the GCC Runtime Library Exception along with this program;
  18. see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
  19. <http://www.gnu.org/licenses/>. */
  20. /* This file handles the taskloop construct. It is included twice, once
  21. for the long and once for unsigned long long variant. */
  22. /* Called when encountering an explicit task directive. If IF_CLAUSE is
  23. false, then we must not delay in executing the task. If UNTIED is true,
  24. then the task may be executed by any member of the team. */
  25. void
  26. GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
  27. long arg_size, long arg_align, unsigned flags,
  28. unsigned long num_tasks, int priority,
  29. TYPE start, TYPE end, TYPE step)
  30. {
  31. struct gomp_thread *thr = gomp_thread ();
  32. struct gomp_team *team = thr->ts.team;
  33. #ifdef HAVE_BROKEN_POSIX_SEMAPHORES
  34. /* If pthread_mutex_* is used for omp_*lock*, then each task must be
  35. tied to one thread all the time. This means UNTIED tasks must be
  36. tied and if CPYFN is non-NULL IF(0) must be forced, as CPYFN
  37. might be running on different thread than FN. */
  38. if (cpyfn)
  39. flags &= ~GOMP_TASK_FLAG_IF;
  40. flags &= ~GOMP_TASK_FLAG_UNTIED;
  41. #endif
  42. /* If parallel or taskgroup has been cancelled, don't start new tasks. */
  43. if (team && gomp_team_barrier_cancelled (&team->barrier))
  44. {
  45. early_return:
  46. if ((flags & (GOMP_TASK_FLAG_NOGROUP | GOMP_TASK_FLAG_REDUCTION))
  47. == GOMP_TASK_FLAG_REDUCTION)
  48. {
  49. struct gomp_data_head { TYPE t1, t2; uintptr_t *ptr; };
  50. uintptr_t *ptr = ((struct gomp_data_head *) data)->ptr;
  51. /* Tell callers GOMP_taskgroup_reduction_register has not been
  52. called. */
  53. ptr[2] = 0;
  54. }
  55. return;
  56. }
  57. #ifdef TYPE_is_long
  58. TYPE s = step;
  59. if (step > 0)
  60. {
  61. if (start >= end)
  62. goto early_return;
  63. s--;
  64. }
  65. else
  66. {
  67. if (start <= end)
  68. goto early_return;
  69. s++;
  70. }
  71. UTYPE n = (end - start + s) / step;
  72. #else
  73. UTYPE n;
  74. if (flags & GOMP_TASK_FLAG_UP)
  75. {
  76. if (start >= end)
  77. goto early_return;
  78. n = (end - start + step - 1) / step;
  79. }
  80. else
  81. {
  82. if (start <= end)
  83. goto early_return;
  84. n = (start - end - step - 1) / -step;
  85. }
  86. #endif
  87. TYPE task_step = step;
  88. TYPE nfirst_task_step = step;
  89. unsigned long nfirst = n;
  90. if (flags & GOMP_TASK_FLAG_GRAINSIZE)
  91. {
  92. unsigned long grainsize = num_tasks;
  93. #ifdef TYPE_is_long
  94. num_tasks = n / grainsize;
  95. #else
  96. UTYPE ndiv = n / grainsize;
  97. num_tasks = ndiv;
  98. if (num_tasks != ndiv)
  99. num_tasks = ~0UL;
  100. #endif
  101. if ((flags & GOMP_TASK_FLAG_STRICT)
  102. && num_tasks != ~0ULL)
  103. {
  104. UTYPE mod = n % grainsize;
  105. task_step = (TYPE) grainsize * step;
  106. if (mod)
  107. {
  108. num_tasks++;
  109. nfirst_task_step = (TYPE) mod * step;
  110. if (num_tasks == 1)
  111. task_step = nfirst_task_step;
  112. else
  113. nfirst = num_tasks - 2;
  114. }
  115. }
  116. else if (num_tasks <= 1)
  117. {
  118. num_tasks = 1;
  119. task_step = end - start;
  120. }
  121. else if (num_tasks >= grainsize
  122. #ifndef TYPE_is_long
  123. && num_tasks != ~0UL
  124. #endif
  125. )
  126. {
  127. UTYPE mul = num_tasks * grainsize;
  128. task_step = (TYPE) grainsize * step;
  129. if (mul != n)
  130. {
  131. nfirst_task_step = task_step;
  132. task_step += step;
  133. nfirst = n - mul - 1;
  134. }
  135. }
  136. else
  137. {
  138. UTYPE div = n / num_tasks;
  139. UTYPE mod = n % num_tasks;
  140. task_step = (TYPE) div * step;
  141. if (mod)
  142. {
  143. nfirst_task_step = task_step;
  144. task_step += step;
  145. nfirst = mod - 1;
  146. }
  147. }
  148. }
  149. else
  150. {
  151. if (num_tasks == 0)
  152. num_tasks = team ? team->nthreads : 1;
  153. if (num_tasks >= n)
  154. num_tasks = n;
  155. else
  156. {
  157. UTYPE div = n / num_tasks;
  158. UTYPE mod = n % num_tasks;
  159. task_step = (TYPE) div * step;
  160. if (mod)
  161. {
  162. nfirst_task_step = task_step;
  163. task_step += step;
  164. nfirst = mod - 1;
  165. }
  166. }
  167. }
  168. if (flags & GOMP_TASK_FLAG_NOGROUP)
  169. {
  170. if (__builtin_expect (gomp_cancel_var, 0)
  171. && thr->task
  172. && thr->task->taskgroup)
  173. {
  174. if (thr->task->taskgroup->cancelled)
  175. return;
  176. if (thr->task->taskgroup->workshare
  177. && thr->task->taskgroup->prev
  178. && thr->task->taskgroup->prev->cancelled)
  179. return;
  180. }
  181. }
  182. else
  183. {
  184. ialias_call (GOMP_taskgroup_start) ();
  185. if (flags & GOMP_TASK_FLAG_REDUCTION)
  186. {
  187. struct gomp_data_head { TYPE t1, t2; uintptr_t *ptr; };
  188. uintptr_t *ptr = ((struct gomp_data_head *) data)->ptr;
  189. ialias_call (GOMP_taskgroup_reduction_register) (ptr);
  190. }
  191. }
  192. if (priority > gomp_max_task_priority_var)
  193. priority = gomp_max_task_priority_var;
  194. if ((flags & GOMP_TASK_FLAG_IF) == 0 || team == NULL
  195. || (thr->task && thr->task->final_task)
  196. || team->task_count + num_tasks > 64 * team->nthreads)
  197. {
  198. unsigned long i;
  199. if (__builtin_expect (cpyfn != NULL, 0))
  200. {
  201. struct gomp_task task[num_tasks];
  202. struct gomp_task *parent = thr->task;
  203. arg_size = (arg_size + arg_align - 1) & ~(arg_align - 1);
  204. char buf[num_tasks * arg_size + arg_align - 1];
  205. char *arg = (char *) (((uintptr_t) buf + arg_align - 1)
  206. & ~(uintptr_t) (arg_align - 1));
  207. char *orig_arg = arg;
  208. for (i = 0; i < num_tasks; i++)
  209. {
  210. gomp_init_task (&task[i], parent, gomp_icv (false));
  211. task[i].priority = priority;
  212. task[i].kind = GOMP_TASK_UNDEFERRED;
  213. task[i].final_task = (thr->task && thr->task->final_task)
  214. || (flags & GOMP_TASK_FLAG_FINAL);
  215. if (thr->task)
  216. {
  217. task[i].in_tied_task = thr->task->in_tied_task;
  218. task[i].taskgroup = thr->task->taskgroup;
  219. }
  220. thr->task = &task[i];
  221. cpyfn (arg, data);
  222. arg += arg_size;
  223. }
  224. arg = orig_arg;
  225. for (i = 0; i < num_tasks; i++)
  226. {
  227. thr->task = &task[i];
  228. ((TYPE *)arg)[0] = start;
  229. start += task_step;
  230. ((TYPE *)arg)[1] = start;
  231. if (i == nfirst)
  232. task_step = nfirst_task_step;
  233. fn (arg);
  234. arg += arg_size;
  235. if (!priority_queue_empty_p (&task[i].children_queue,
  236. MEMMODEL_RELAXED))
  237. {
  238. gomp_mutex_lock (&team->task_lock);
  239. gomp_clear_parent (&task[i].children_queue);
  240. gomp_mutex_unlock (&team->task_lock);
  241. }
  242. gomp_end_task ();
  243. }
  244. }
  245. else
  246. for (i = 0; i < num_tasks; i++)
  247. {
  248. struct gomp_task task;
  249. gomp_init_task (&task, thr->task, gomp_icv (false));
  250. task.priority = priority;
  251. task.kind = GOMP_TASK_UNDEFERRED;
  252. task.final_task = (thr->task && thr->task->final_task)
  253. || (flags & GOMP_TASK_FLAG_FINAL);
  254. if (thr->task)
  255. {
  256. task.in_tied_task = thr->task->in_tied_task;
  257. task.taskgroup = thr->task->taskgroup;
  258. }
  259. thr->task = &task;
  260. ((TYPE *)data)[0] = start;
  261. start += task_step;
  262. ((TYPE *)data)[1] = start;
  263. if (i == nfirst)
  264. task_step = nfirst_task_step;
  265. fn (data);
  266. if (!priority_queue_empty_p (&task.children_queue,
  267. MEMMODEL_RELAXED))
  268. {
  269. gomp_mutex_lock (&team->task_lock);
  270. gomp_clear_parent (&task.children_queue);
  271. gomp_mutex_unlock (&team->task_lock);
  272. }
  273. gomp_end_task ();
  274. }
  275. }
  276. else
  277. {
  278. struct gomp_task *tasks[num_tasks];
  279. struct gomp_task *parent = thr->task;
  280. struct gomp_taskgroup *taskgroup = parent->taskgroup;
  281. char *arg;
  282. int do_wake;
  283. unsigned long i;
  284. for (i = 0; i < num_tasks; i++)
  285. {
  286. struct gomp_task *task
  287. = gomp_malloc (sizeof (*task) + arg_size + arg_align - 1);
  288. tasks[i] = task;
  289. arg = (char *) (((uintptr_t) (task + 1) + arg_align - 1)
  290. & ~(uintptr_t) (arg_align - 1));
  291. gomp_init_task (task, parent, gomp_icv (false));
  292. task->priority = priority;
  293. task->kind = GOMP_TASK_UNDEFERRED;
  294. task->in_tied_task = parent->in_tied_task;
  295. task->taskgroup = taskgroup;
  296. thr->task = task;
  297. if (cpyfn)
  298. {
  299. cpyfn (arg, data);
  300. task->copy_ctors_done = true;
  301. }
  302. else
  303. memcpy (arg, data, arg_size);
  304. ((TYPE *)arg)[0] = start;
  305. start += task_step;
  306. ((TYPE *)arg)[1] = start;
  307. if (i == nfirst)
  308. task_step = nfirst_task_step;
  309. thr->task = parent;
  310. task->kind = GOMP_TASK_WAITING;
  311. task->fn = fn;
  312. task->fn_data = arg;
  313. task->final_task = (flags & GOMP_TASK_FLAG_FINAL) >> 1;
  314. }
  315. gomp_mutex_lock (&team->task_lock);
  316. /* If parallel or taskgroup has been cancelled, don't start new
  317. tasks. */
  318. if (__builtin_expect (gomp_cancel_var, 0)
  319. && cpyfn == NULL)
  320. {
  321. if (gomp_team_barrier_cancelled (&team->barrier))
  322. {
  323. do_cancel:
  324. gomp_mutex_unlock (&team->task_lock);
  325. for (i = 0; i < num_tasks; i++)
  326. {
  327. gomp_finish_task (tasks[i]);
  328. free (tasks[i]);
  329. }
  330. if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0)
  331. ialias_call (GOMP_taskgroup_end) ();
  332. return;
  333. }
  334. if (taskgroup)
  335. {
  336. if (taskgroup->cancelled)
  337. goto do_cancel;
  338. if (taskgroup->workshare
  339. && taskgroup->prev
  340. && taskgroup->prev->cancelled)
  341. goto do_cancel;
  342. }
  343. }
  344. if (taskgroup)
  345. taskgroup->num_children += num_tasks;
  346. for (i = 0; i < num_tasks; i++)
  347. {
  348. struct gomp_task *task = tasks[i];
  349. priority_queue_insert (PQ_CHILDREN, &parent->children_queue,
  350. task, priority,
  351. PRIORITY_INSERT_BEGIN,
  352. /*last_parent_depends_on=*/false,
  353. task->parent_depends_on);
  354. if (taskgroup)
  355. priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
  356. task, priority, PRIORITY_INSERT_BEGIN,
  357. /*last_parent_depends_on=*/false,
  358. task->parent_depends_on);
  359. priority_queue_insert (PQ_TEAM, &team->task_queue, task, priority,
  360. PRIORITY_INSERT_END,
  361. /*last_parent_depends_on=*/false,
  362. task->parent_depends_on);
  363. ++team->task_count;
  364. ++team->task_queued_count;
  365. }
  366. gomp_team_barrier_set_task_pending (&team->barrier);
  367. if (team->task_running_count + !parent->in_tied_task
  368. < team->nthreads)
  369. {
  370. do_wake = team->nthreads - team->task_running_count
  371. - !parent->in_tied_task;
  372. if ((unsigned long) do_wake > num_tasks)
  373. do_wake = num_tasks;
  374. }
  375. else
  376. do_wake = 0;
  377. gomp_mutex_unlock (&team->task_lock);
  378. if (do_wake)
  379. gomp_team_barrier_wake (&team->barrier, do_wake);
  380. }
  381. if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0)
  382. ialias_call (GOMP_taskgroup_end) ();
  383. }