target.c 118 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953
  1. /* Copyright (C) 2013-2022 Free Software Foundation, Inc.
  2. Contributed by Jakub Jelinek <jakub@redhat.com>.
  3. This file is part of the GNU Offloading and Multi Processing Library
  4. (libgomp).
  5. Libgomp is free software; you can redistribute it and/or modify it
  6. under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 3, or (at your option)
  8. any later version.
  9. Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
  10. WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  11. FOR A PARTICULAR PURPOSE. See the GNU General Public License for
  12. more details.
  13. Under Section 7 of GPL version 3, you are granted additional
  14. permissions described in the GCC Runtime Library Exception, version
  15. 3.1, as published by the Free Software Foundation.
  16. You should have received a copy of the GNU General Public License and
  17. a copy of the GCC Runtime Library Exception along with this program;
  18. see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
  19. <http://www.gnu.org/licenses/>. */
  20. /* This file contains the support of offloading. */
  21. #include "libgomp.h"
  22. #include "oacc-plugin.h"
  23. #include "oacc-int.h"
  24. #include "gomp-constants.h"
  25. #include <limits.h>
  26. #include <stdbool.h>
  27. #include <stdlib.h>
  28. #ifdef HAVE_INTTYPES_H
  29. # include <inttypes.h> /* For PRIu64. */
  30. #endif
  31. #include <string.h>
  32. #include <assert.h>
  33. #include <errno.h>
  34. #ifdef PLUGIN_SUPPORT
  35. #include <dlfcn.h>
  36. #include "plugin-suffix.h"
  37. #endif
  38. typedef uintptr_t *hash_entry_type;
  39. static inline void * htab_alloc (size_t size) { return gomp_malloc (size); }
  40. static inline void htab_free (void *ptr) { free (ptr); }
  41. #include "hashtab.h"
  42. static inline hashval_t
  43. htab_hash (hash_entry_type element)
  44. {
  45. return hash_pointer ((void *) element);
  46. }
  47. static inline bool
  48. htab_eq (hash_entry_type x, hash_entry_type y)
  49. {
  50. return x == y;
  51. }
  52. #define FIELD_TGT_EMPTY (~(size_t) 0)
  53. static void gomp_target_init (void);
  54. /* The whole initialization code for offloading plugins is only run one. */
  55. static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
  56. /* Mutex for offload image registration. */
  57. static gomp_mutex_t register_lock;
  58. /* This structure describes an offload image.
  59. It contains type of the target device, pointer to host table descriptor, and
  60. pointer to target data. */
  61. struct offload_image_descr {
  62. unsigned version;
  63. enum offload_target_type type;
  64. const void *host_table;
  65. const void *target_data;
  66. };
  67. /* Array of descriptors of offload images. */
  68. static struct offload_image_descr *offload_images;
  69. /* Total number of offload images. */
  70. static int num_offload_images;
  71. /* Array of descriptors for all available devices. */
  72. static struct gomp_device_descr *devices;
  73. /* Total number of available devices. */
  74. static int num_devices;
  75. /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
  76. static int num_devices_openmp;
  77. /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
  78. static void *
  79. gomp_realloc_unlock (void *old, size_t size)
  80. {
  81. void *ret = realloc (old, size);
  82. if (ret == NULL)
  83. {
  84. gomp_mutex_unlock (&register_lock);
  85. gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
  86. }
  87. return ret;
  88. }
  89. attribute_hidden void
  90. gomp_init_targets_once (void)
  91. {
  92. (void) pthread_once (&gomp_is_initialized, gomp_target_init);
  93. }
  94. attribute_hidden int
  95. gomp_get_num_devices (void)
  96. {
  97. gomp_init_targets_once ();
  98. return num_devices_openmp;
  99. }
  100. static struct gomp_device_descr *
  101. resolve_device (int device_id)
  102. {
  103. if (device_id == GOMP_DEVICE_ICV)
  104. {
  105. struct gomp_task_icv *icv = gomp_icv (false);
  106. device_id = icv->default_device_var;
  107. }
  108. if (device_id < 0 || device_id >= gomp_get_num_devices ())
  109. {
  110. if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
  111. && device_id != GOMP_DEVICE_HOST_FALLBACK
  112. && device_id != num_devices_openmp)
  113. gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
  114. "but device not found");
  115. return NULL;
  116. }
  117. gomp_mutex_lock (&devices[device_id].lock);
  118. if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED)
  119. gomp_init_device (&devices[device_id]);
  120. else if (devices[device_id].state == GOMP_DEVICE_FINALIZED)
  121. {
  122. gomp_mutex_unlock (&devices[device_id].lock);
  123. if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY)
  124. gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
  125. "but device is finalized");
  126. return NULL;
  127. }
  128. gomp_mutex_unlock (&devices[device_id].lock);
  129. return &devices[device_id];
  130. }
  131. static inline splay_tree_key
  132. gomp_map_lookup (splay_tree mem_map, splay_tree_key key)
  133. {
  134. if (key->host_start != key->host_end)
  135. return splay_tree_lookup (mem_map, key);
  136. key->host_end++;
  137. splay_tree_key n = splay_tree_lookup (mem_map, key);
  138. key->host_end--;
  139. if (n)
  140. return n;
  141. key->host_start--;
  142. n = splay_tree_lookup (mem_map, key);
  143. key->host_start++;
  144. if (n)
  145. return n;
  146. return splay_tree_lookup (mem_map, key);
  147. }
  148. static inline splay_tree_key
  149. gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key)
  150. {
  151. if (key->host_start != key->host_end)
  152. return splay_tree_lookup (mem_map, key);
  153. key->host_end++;
  154. splay_tree_key n = splay_tree_lookup (mem_map, key);
  155. key->host_end--;
  156. return n;
  157. }
  158. static inline void
  159. gomp_device_copy (struct gomp_device_descr *devicep,
  160. bool (*copy_func) (int, void *, const void *, size_t),
  161. const char *dst, void *dstaddr,
  162. const char *src, const void *srcaddr,
  163. size_t size)
  164. {
  165. if (!copy_func (devicep->target_id, dstaddr, srcaddr, size))
  166. {
  167. gomp_mutex_unlock (&devicep->lock);
  168. gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
  169. src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
  170. }
  171. }
  172. static inline void
  173. goacc_device_copy_async (struct gomp_device_descr *devicep,
  174. bool (*copy_func) (int, void *, const void *, size_t,
  175. struct goacc_asyncqueue *),
  176. const char *dst, void *dstaddr,
  177. const char *src, const void *srcaddr,
  178. const void *srcaddr_orig,
  179. size_t size, struct goacc_asyncqueue *aq)
  180. {
  181. if (!copy_func (devicep->target_id, dstaddr, srcaddr, size, aq))
  182. {
  183. gomp_mutex_unlock (&devicep->lock);
  184. if (srcaddr_orig && srcaddr_orig != srcaddr)
  185. gomp_fatal ("Copying of %s object [%p..%p)"
  186. " via buffer %s object [%p..%p)"
  187. " to %s object [%p..%p) failed",
  188. src, srcaddr_orig, srcaddr_orig + size,
  189. src, srcaddr, srcaddr + size,
  190. dst, dstaddr, dstaddr + size);
  191. else
  192. gomp_fatal ("Copying of %s object [%p..%p)"
  193. " to %s object [%p..%p) failed",
  194. src, srcaddr, srcaddr + size,
  195. dst, dstaddr, dstaddr + size);
  196. }
  197. }
  198. /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
  199. host to device memory transfers. */
  200. struct gomp_coalesce_chunk
  201. {
  202. /* The starting and ending point of a coalesced chunk of memory. */
  203. size_t start, end;
  204. };
  205. struct gomp_coalesce_buf
  206. {
  207. /* Buffer into which gomp_copy_host2dev will memcpy data and from which
  208. it will be copied to the device. */
  209. void *buf;
  210. struct target_mem_desc *tgt;
  211. /* Array with offsets, chunks[i].start is the starting offset and
  212. chunks[i].end ending offset relative to tgt->tgt_start device address
  213. of chunks which are to be copied to buf and later copied to device. */
  214. struct gomp_coalesce_chunk *chunks;
  215. /* Number of chunks in chunks array, or -1 if coalesce buffering should not
  216. be performed. */
  217. long chunk_cnt;
  218. /* During construction of chunks array, how many memory regions are within
  219. the last chunk. If there is just one memory region for a chunk, we copy
  220. it directly to device rather than going through buf. */
  221. long use_cnt;
  222. };
  223. /* Maximum size of memory region considered for coalescing. Larger copies
  224. are performed directly. */
  225. #define MAX_COALESCE_BUF_SIZE (32 * 1024)
  226. /* Maximum size of a gap in between regions to consider them being copied
  227. within the same chunk. All the device offsets considered are within
  228. newly allocated device memory, so it isn't fatal if we copy some padding
  229. in between from host to device. The gaps come either from alignment
  230. padding or from memory regions which are not supposed to be copied from
  231. host to device (e.g. map(alloc:), map(from:) etc.). */
  232. #define MAX_COALESCE_BUF_GAP (4 * 1024)
  233. /* Add region with device tgt_start relative offset and length to CBUF.
  234. This must not be used for asynchronous copies, because the host data might
  235. not be computed yet (by an earlier asynchronous compute region, for
  236. example).
  237. TODO ... but we could allow CBUF usage for EPHEMERAL data? (Open question:
  238. is it more performant to use libgomp CBUF buffering or individual device
  239. asyncronous copying?) */
  240. static inline void
  241. gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len)
  242. {
  243. if (len > MAX_COALESCE_BUF_SIZE || len == 0)
  244. return;
  245. if (cbuf->chunk_cnt)
  246. {
  247. if (cbuf->chunk_cnt < 0)
  248. return;
  249. if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end)
  250. {
  251. cbuf->chunk_cnt = -1;
  252. return;
  253. }
  254. if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end + MAX_COALESCE_BUF_GAP)
  255. {
  256. cbuf->chunks[cbuf->chunk_cnt - 1].end = start + len;
  257. cbuf->use_cnt++;
  258. return;
  259. }
  260. /* If the last chunk is only used by one mapping, discard it,
  261. as it will be one host to device copy anyway and
  262. memcpying it around will only waste cycles. */
  263. if (cbuf->use_cnt == 1)
  264. cbuf->chunk_cnt--;
  265. }
  266. cbuf->chunks[cbuf->chunk_cnt].start = start;
  267. cbuf->chunks[cbuf->chunk_cnt].end = start + len;
  268. cbuf->chunk_cnt++;
  269. cbuf->use_cnt = 1;
  270. }
  271. /* Return true for mapping kinds which need to copy data from the
  272. host to device for regions that weren't previously mapped. */
  273. static inline bool
  274. gomp_to_device_kind_p (int kind)
  275. {
  276. switch (kind)
  277. {
  278. case GOMP_MAP_ALLOC:
  279. case GOMP_MAP_FROM:
  280. case GOMP_MAP_FORCE_ALLOC:
  281. case GOMP_MAP_FORCE_FROM:
  282. case GOMP_MAP_ALWAYS_FROM:
  283. return false;
  284. default:
  285. return true;
  286. }
  287. }
  288. /* Copy host memory to an offload device. In asynchronous mode (if AQ is
  289. non-NULL), when the source data is stack or may otherwise be deallocated
  290. before the asynchronous copy takes place, EPHEMERAL must be passed as
  291. TRUE. */
  292. attribute_hidden void
  293. gomp_copy_host2dev (struct gomp_device_descr *devicep,
  294. struct goacc_asyncqueue *aq,
  295. void *d, const void *h, size_t sz,
  296. bool ephemeral, struct gomp_coalesce_buf *cbuf)
  297. {
  298. if (__builtin_expect (aq != NULL, 0))
  299. {
  300. /* See 'gomp_coalesce_buf_add'. */
  301. assert (!cbuf);
  302. void *h_buf = (void *) h;
  303. if (ephemeral)
  304. {
  305. /* We're queueing up an asynchronous copy from data that may
  306. disappear before the transfer takes place (i.e. because it is a
  307. stack local in a function that is no longer executing). Make a
  308. copy of the data into a temporary buffer in those cases. */
  309. h_buf = gomp_malloc (sz);
  310. memcpy (h_buf, h, sz);
  311. }
  312. goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func,
  313. "dev", d, "host", h_buf, h, sz, aq);
  314. if (ephemeral)
  315. /* Free temporary buffer once the transfer has completed. */
  316. devicep->openacc.async.queue_callback_func (aq, free, h_buf);
  317. return;
  318. }
  319. if (cbuf)
  320. {
  321. uintptr_t doff = (uintptr_t) d - cbuf->tgt->tgt_start;
  322. if (doff < cbuf->chunks[cbuf->chunk_cnt - 1].end)
  323. {
  324. long first = 0;
  325. long last = cbuf->chunk_cnt - 1;
  326. while (first <= last)
  327. {
  328. long middle = (first + last) >> 1;
  329. if (cbuf->chunks[middle].end <= doff)
  330. first = middle + 1;
  331. else if (cbuf->chunks[middle].start <= doff)
  332. {
  333. if (doff + sz > cbuf->chunks[middle].end)
  334. {
  335. gomp_mutex_unlock (&devicep->lock);
  336. gomp_fatal ("internal libgomp cbuf error");
  337. }
  338. memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0].start),
  339. h, sz);
  340. return;
  341. }
  342. else
  343. last = middle - 1;
  344. }
  345. }
  346. }
  347. gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
  348. }
  349. attribute_hidden void
  350. gomp_copy_dev2host (struct gomp_device_descr *devicep,
  351. struct goacc_asyncqueue *aq,
  352. void *h, const void *d, size_t sz)
  353. {
  354. if (__builtin_expect (aq != NULL, 0))
  355. goacc_device_copy_async (devicep, devicep->openacc.async.dev2host_func,
  356. "host", h, "dev", d, NULL, sz, aq);
  357. else
  358. gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
  359. }
  360. static void
  361. gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
  362. {
  363. if (!devicep->free_func (devicep->target_id, devptr))
  364. {
  365. gomp_mutex_unlock (&devicep->lock);
  366. gomp_fatal ("error in freeing device memory block at %p", devptr);
  367. }
  368. }
  369. /* Increment reference count of a splay_tree_key region K by 1.
  370. If REFCOUNT_SET != NULL, use it to track already seen refcounts, and only
  371. increment the value if refcount is not yet contained in the set (used for
  372. OpenMP 5.0, which specifies that a region's refcount is adjusted at most
  373. once for each construct). */
  374. static inline void
  375. gomp_increment_refcount (splay_tree_key k, htab_t *refcount_set)
  376. {
  377. if (k == NULL || k->refcount == REFCOUNT_INFINITY)
  378. return;
  379. uintptr_t *refcount_ptr = &k->refcount;
  380. if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
  381. refcount_ptr = &k->structelem_refcount;
  382. else if (REFCOUNT_STRUCTELEM_P (k->refcount))
  383. refcount_ptr = k->structelem_refcount_ptr;
  384. if (refcount_set)
  385. {
  386. if (htab_find (*refcount_set, refcount_ptr))
  387. return;
  388. uintptr_t **slot = htab_find_slot (refcount_set, refcount_ptr, INSERT);
  389. *slot = refcount_ptr;
  390. }
  391. *refcount_ptr += 1;
  392. return;
  393. }
  394. /* Decrement reference count of a splay_tree_key region K by 1, or if DELETE_P
  395. is true, set reference count to zero. If REFCOUNT_SET != NULL, use it to
  396. track already seen refcounts, and only adjust the value if refcount is not
  397. yet contained in the set (like gomp_increment_refcount).
  398. Return out-values: set *DO_COPY to true if we set the refcount to zero, or
  399. it is already zero and we know we decremented it earlier. This signals that
  400. associated maps should be copied back to host.
  401. *DO_REMOVE is set to true when we this is the first handling of this refcount
  402. and we are setting it to zero. This signals a removal of this key from the
  403. splay-tree map.
  404. Copy and removal are separated due to cases like handling of structure
  405. elements, e.g. each map of a structure element representing a possible copy
  406. out of a structure field has to be handled individually, but we only signal
  407. removal for one (the first encountered) sibing map. */
  408. static inline void
  409. gomp_decrement_refcount (splay_tree_key k, htab_t *refcount_set, bool delete_p,
  410. bool *do_copy, bool *do_remove)
  411. {
  412. if (k == NULL || k->refcount == REFCOUNT_INFINITY)
  413. {
  414. *do_copy = *do_remove = false;
  415. return;
  416. }
  417. uintptr_t *refcount_ptr = &k->refcount;
  418. if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
  419. refcount_ptr = &k->structelem_refcount;
  420. else if (REFCOUNT_STRUCTELEM_P (k->refcount))
  421. refcount_ptr = k->structelem_refcount_ptr;
  422. bool new_encountered_refcount;
  423. bool set_to_zero = false;
  424. bool is_zero = false;
  425. uintptr_t orig_refcount = *refcount_ptr;
  426. if (refcount_set)
  427. {
  428. if (htab_find (*refcount_set, refcount_ptr))
  429. {
  430. new_encountered_refcount = false;
  431. goto end;
  432. }
  433. uintptr_t **slot = htab_find_slot (refcount_set, refcount_ptr, INSERT);
  434. *slot = refcount_ptr;
  435. new_encountered_refcount = true;
  436. }
  437. else
  438. /* If no refcount_set being used, assume all keys are being decremented
  439. for the first time. */
  440. new_encountered_refcount = true;
  441. if (delete_p)
  442. *refcount_ptr = 0;
  443. else if (*refcount_ptr > 0)
  444. *refcount_ptr -= 1;
  445. end:
  446. if (*refcount_ptr == 0)
  447. {
  448. if (orig_refcount > 0)
  449. set_to_zero = true;
  450. is_zero = true;
  451. }
  452. *do_copy = (set_to_zero || (!new_encountered_refcount && is_zero));
  453. *do_remove = (new_encountered_refcount && set_to_zero);
  454. }
  455. /* Handle the case where gomp_map_lookup, splay_tree_lookup or
  456. gomp_map_0len_lookup found oldn for newn.
  457. Helper function of gomp_map_vars. */
  458. static inline void
  459. gomp_map_vars_existing (struct gomp_device_descr *devicep,
  460. struct goacc_asyncqueue *aq, splay_tree_key oldn,
  461. splay_tree_key newn, struct target_var_desc *tgt_var,
  462. unsigned char kind, bool always_to_flag, bool implicit,
  463. struct gomp_coalesce_buf *cbuf,
  464. htab_t *refcount_set)
  465. {
  466. assert (kind != GOMP_MAP_ATTACH
  467. || kind != GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION);
  468. tgt_var->key = oldn;
  469. tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind);
  470. tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
  471. tgt_var->is_attach = false;
  472. tgt_var->offset = newn->host_start - oldn->host_start;
  473. /* For implicit maps, old contained in new is valid. */
  474. bool implicit_subset = (implicit
  475. && newn->host_start <= oldn->host_start
  476. && oldn->host_end <= newn->host_end);
  477. if (implicit_subset)
  478. tgt_var->length = oldn->host_end - oldn->host_start;
  479. else
  480. tgt_var->length = newn->host_end - newn->host_start;
  481. if ((kind & GOMP_MAP_FLAG_FORCE)
  482. /* For implicit maps, old contained in new is valid. */
  483. || !(implicit_subset
  484. /* Otherwise, new contained inside old is considered valid. */
  485. || (oldn->host_start <= newn->host_start
  486. && newn->host_end <= oldn->host_end)))
  487. {
  488. gomp_mutex_unlock (&devicep->lock);
  489. gomp_fatal ("Trying to map into device [%p..%p) object when "
  490. "[%p..%p) is already mapped",
  491. (void *) newn->host_start, (void *) newn->host_end,
  492. (void *) oldn->host_start, (void *) oldn->host_end);
  493. }
  494. if (GOMP_MAP_ALWAYS_TO_P (kind) || always_to_flag)
  495. {
  496. /* Implicit + always should not happen. If this does occur, below
  497. address/length adjustment is a TODO. */
  498. assert (!implicit_subset);
  499. if (oldn->aux && oldn->aux->attach_count)
  500. {
  501. /* We have to be careful not to overwrite still attached pointers
  502. during the copyback to host. */
  503. uintptr_t addr = newn->host_start;
  504. while (addr < newn->host_end)
  505. {
  506. size_t i = (addr - oldn->host_start) / sizeof (void *);
  507. if (oldn->aux->attach_count[i] == 0)
  508. gomp_copy_host2dev (devicep, aq,
  509. (void *) (oldn->tgt->tgt_start
  510. + oldn->tgt_offset
  511. + addr - oldn->host_start),
  512. (void *) addr,
  513. sizeof (void *), false, cbuf);
  514. addr += sizeof (void *);
  515. }
  516. }
  517. else
  518. gomp_copy_host2dev (devicep, aq,
  519. (void *) (oldn->tgt->tgt_start + oldn->tgt_offset
  520. + newn->host_start - oldn->host_start),
  521. (void *) newn->host_start,
  522. newn->host_end - newn->host_start, false, cbuf);
  523. }
  524. gomp_increment_refcount (oldn, refcount_set);
  525. }
  526. static int
  527. get_kind (bool short_mapkind, void *kinds, int idx)
  528. {
  529. if (!short_mapkind)
  530. return ((unsigned char *) kinds)[idx];
  531. int val = ((unsigned short *) kinds)[idx];
  532. if (GOMP_MAP_IMPLICIT_P (val))
  533. val &= ~GOMP_MAP_IMPLICIT;
  534. return val;
  535. }
  536. static bool
  537. get_implicit (bool short_mapkind, void *kinds, int idx)
  538. {
  539. if (!short_mapkind)
  540. return false;
  541. int val = ((unsigned short *) kinds)[idx];
  542. return GOMP_MAP_IMPLICIT_P (val);
  543. }
  544. static void
  545. gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq,
  546. uintptr_t host_ptr, uintptr_t target_offset, uintptr_t bias,
  547. struct gomp_coalesce_buf *cbuf,
  548. bool allow_zero_length_array_sections)
  549. {
  550. struct gomp_device_descr *devicep = tgt->device_descr;
  551. struct splay_tree_s *mem_map = &devicep->mem_map;
  552. struct splay_tree_key_s cur_node;
  553. cur_node.host_start = host_ptr;
  554. if (cur_node.host_start == (uintptr_t) NULL)
  555. {
  556. cur_node.tgt_offset = (uintptr_t) NULL;
  557. gomp_copy_host2dev (devicep, aq,
  558. (void *) (tgt->tgt_start + target_offset),
  559. (void *) &cur_node.tgt_offset, sizeof (void *),
  560. true, cbuf);
  561. return;
  562. }
  563. /* Add bias to the pointer value. */
  564. cur_node.host_start += bias;
  565. cur_node.host_end = cur_node.host_start;
  566. splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
  567. if (n == NULL)
  568. {
  569. if (allow_zero_length_array_sections)
  570. cur_node.tgt_offset = 0;
  571. else
  572. {
  573. gomp_mutex_unlock (&devicep->lock);
  574. gomp_fatal ("Pointer target of array section wasn't mapped");
  575. }
  576. }
  577. else
  578. {
  579. cur_node.host_start -= n->host_start;
  580. cur_node.tgt_offset
  581. = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start;
  582. /* At this point tgt_offset is target address of the
  583. array section. Now subtract bias to get what we want
  584. to initialize the pointer with. */
  585. cur_node.tgt_offset -= bias;
  586. }
  587. gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + target_offset),
  588. (void *) &cur_node.tgt_offset, sizeof (void *),
  589. true, cbuf);
  590. }
  591. static void
  592. gomp_map_fields_existing (struct target_mem_desc *tgt,
  593. struct goacc_asyncqueue *aq, splay_tree_key n,
  594. size_t first, size_t i, void **hostaddrs,
  595. size_t *sizes, void *kinds,
  596. struct gomp_coalesce_buf *cbuf, htab_t *refcount_set)
  597. {
  598. struct gomp_device_descr *devicep = tgt->device_descr;
  599. struct splay_tree_s *mem_map = &devicep->mem_map;
  600. struct splay_tree_key_s cur_node;
  601. int kind;
  602. bool implicit;
  603. const bool short_mapkind = true;
  604. const int typemask = short_mapkind ? 0xff : 0x7;
  605. cur_node.host_start = (uintptr_t) hostaddrs[i];
  606. cur_node.host_end = cur_node.host_start + sizes[i];
  607. splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node);
  608. kind = get_kind (short_mapkind, kinds, i);
  609. implicit = get_implicit (short_mapkind, kinds, i);
  610. if (n2
  611. && n2->tgt == n->tgt
  612. && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
  613. {
  614. gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
  615. kind & typemask, false, implicit, cbuf,
  616. refcount_set);
  617. return;
  618. }
  619. if (sizes[i] == 0)
  620. {
  621. if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1])
  622. {
  623. cur_node.host_start--;
  624. n2 = splay_tree_lookup (mem_map, &cur_node);
  625. cur_node.host_start++;
  626. if (n2
  627. && n2->tgt == n->tgt
  628. && n2->host_start - n->host_start
  629. == n2->tgt_offset - n->tgt_offset)
  630. {
  631. gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
  632. kind & typemask, false, implicit, cbuf,
  633. refcount_set);
  634. return;
  635. }
  636. }
  637. cur_node.host_end++;
  638. n2 = splay_tree_lookup (mem_map, &cur_node);
  639. cur_node.host_end--;
  640. if (n2
  641. && n2->tgt == n->tgt
  642. && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
  643. {
  644. gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
  645. kind & typemask, false, implicit, cbuf,
  646. refcount_set);
  647. return;
  648. }
  649. }
  650. gomp_mutex_unlock (&devicep->lock);
  651. gomp_fatal ("Trying to map into device [%p..%p) structure element when "
  652. "other mapped elements from the same structure weren't mapped "
  653. "together with it", (void *) cur_node.host_start,
  654. (void *) cur_node.host_end);
  655. }
  656. attribute_hidden void
  657. gomp_attach_pointer (struct gomp_device_descr *devicep,
  658. struct goacc_asyncqueue *aq, splay_tree mem_map,
  659. splay_tree_key n, uintptr_t attach_to, size_t bias,
  660. struct gomp_coalesce_buf *cbufp,
  661. bool allow_zero_length_array_sections)
  662. {
  663. struct splay_tree_key_s s;
  664. size_t size, idx;
  665. if (n == NULL)
  666. {
  667. gomp_mutex_unlock (&devicep->lock);
  668. gomp_fatal ("enclosing struct not mapped for attach");
  669. }
  670. size = (n->host_end - n->host_start + sizeof (void *) - 1) / sizeof (void *);
  671. /* We might have a pointer in a packed struct: however we cannot have more
  672. than one such pointer in each pointer-sized portion of the struct, so
  673. this is safe. */
  674. idx = (attach_to - n->host_start) / sizeof (void *);
  675. if (!n->aux)
  676. n->aux = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
  677. if (!n->aux->attach_count)
  678. n->aux->attach_count
  679. = gomp_malloc_cleared (sizeof (*n->aux->attach_count) * size);
  680. if (n->aux->attach_count[idx] < UINTPTR_MAX)
  681. n->aux->attach_count[idx]++;
  682. else
  683. {
  684. gomp_mutex_unlock (&devicep->lock);
  685. gomp_fatal ("attach count overflow");
  686. }
  687. if (n->aux->attach_count[idx] == 1)
  688. {
  689. uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + attach_to
  690. - n->host_start;
  691. uintptr_t target = (uintptr_t) *(void **) attach_to;
  692. splay_tree_key tn;
  693. uintptr_t data;
  694. if ((void *) target == NULL)
  695. {
  696. gomp_mutex_unlock (&devicep->lock);
  697. gomp_fatal ("attempt to attach null pointer");
  698. }
  699. s.host_start = target + bias;
  700. s.host_end = s.host_start + 1;
  701. tn = splay_tree_lookup (mem_map, &s);
  702. if (!tn)
  703. {
  704. if (allow_zero_length_array_sections)
  705. /* When allowing attachment to zero-length array sections, we
  706. allow attaching to NULL pointers when the target region is not
  707. mapped. */
  708. data = 0;
  709. else
  710. {
  711. gomp_mutex_unlock (&devicep->lock);
  712. gomp_fatal ("pointer target not mapped for attach");
  713. }
  714. }
  715. else
  716. data = tn->tgt->tgt_start + tn->tgt_offset + target - tn->host_start;
  717. gomp_debug (1,
  718. "%s: attaching host %p, target %p (struct base %p) to %p\n",
  719. __FUNCTION__, (void *) attach_to, (void *) devptr,
  720. (void *) (n->tgt->tgt_start + n->tgt_offset), (void *) data);
  721. gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data,
  722. sizeof (void *), true, cbufp);
  723. }
  724. else
  725. gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
  726. (void *) attach_to, (int) n->aux->attach_count[idx]);
  727. }
  728. attribute_hidden void
  729. gomp_detach_pointer (struct gomp_device_descr *devicep,
  730. struct goacc_asyncqueue *aq, splay_tree_key n,
  731. uintptr_t detach_from, bool finalize,
  732. struct gomp_coalesce_buf *cbufp)
  733. {
  734. size_t idx;
  735. if (n == NULL)
  736. {
  737. gomp_mutex_unlock (&devicep->lock);
  738. gomp_fatal ("enclosing struct not mapped for detach");
  739. }
  740. idx = (detach_from - n->host_start) / sizeof (void *);
  741. if (!n->aux || !n->aux->attach_count)
  742. {
  743. gomp_mutex_unlock (&devicep->lock);
  744. gomp_fatal ("no attachment counters for struct");
  745. }
  746. if (finalize)
  747. n->aux->attach_count[idx] = 1;
  748. if (n->aux->attach_count[idx] == 0)
  749. {
  750. gomp_mutex_unlock (&devicep->lock);
  751. gomp_fatal ("attach count underflow");
  752. }
  753. else
  754. n->aux->attach_count[idx]--;
  755. if (n->aux->attach_count[idx] == 0)
  756. {
  757. uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + detach_from
  758. - n->host_start;
  759. uintptr_t target = (uintptr_t) *(void **) detach_from;
  760. gomp_debug (1,
  761. "%s: detaching host %p, target %p (struct base %p) to %p\n",
  762. __FUNCTION__, (void *) detach_from, (void *) devptr,
  763. (void *) (n->tgt->tgt_start + n->tgt_offset),
  764. (void *) target);
  765. gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &target,
  766. sizeof (void *), true, cbufp);
  767. }
  768. else
  769. gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
  770. (void *) detach_from, (int) n->aux->attach_count[idx]);
  771. }
  772. attribute_hidden uintptr_t
  773. gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
  774. {
  775. if (tgt->list[i].key != NULL)
  776. return tgt->list[i].key->tgt->tgt_start
  777. + tgt->list[i].key->tgt_offset
  778. + tgt->list[i].offset;
  779. switch (tgt->list[i].offset)
  780. {
  781. case OFFSET_INLINED:
  782. return (uintptr_t) hostaddrs[i];
  783. case OFFSET_POINTER:
  784. return 0;
  785. case OFFSET_STRUCT:
  786. return tgt->list[i + 1].key->tgt->tgt_start
  787. + tgt->list[i + 1].key->tgt_offset
  788. + tgt->list[i + 1].offset
  789. + (uintptr_t) hostaddrs[i]
  790. - (uintptr_t) hostaddrs[i + 1];
  791. default:
  792. return tgt->tgt_start + tgt->list[i].offset;
  793. }
  794. }
  795. static inline __attribute__((always_inline)) struct target_mem_desc *
  796. gomp_map_vars_internal (struct gomp_device_descr *devicep,
  797. struct goacc_asyncqueue *aq, size_t mapnum,
  798. void **hostaddrs, void **devaddrs, size_t *sizes,
  799. void *kinds, bool short_mapkind,
  800. htab_t *refcount_set,
  801. enum gomp_map_vars_kind pragma_kind)
  802. {
  803. size_t i, tgt_align, tgt_size, not_found_cnt = 0;
  804. bool has_firstprivate = false;
  805. bool has_always_ptrset = false;
  806. bool openmp_p = (pragma_kind & GOMP_MAP_VARS_OPENACC) == 0;
  807. const int rshift = short_mapkind ? 8 : 3;
  808. const int typemask = short_mapkind ? 0xff : 0x7;
  809. struct splay_tree_s *mem_map = &devicep->mem_map;
  810. struct splay_tree_key_s cur_node;
  811. struct target_mem_desc *tgt
  812. = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
  813. tgt->list_count = mapnum;
  814. tgt->refcount = (pragma_kind & GOMP_MAP_VARS_ENTER_DATA) ? 0 : 1;
  815. tgt->device_descr = devicep;
  816. tgt->prev = NULL;
  817. struct gomp_coalesce_buf cbuf, *cbufp = NULL;
  818. if (mapnum == 0)
  819. {
  820. tgt->tgt_start = 0;
  821. tgt->tgt_end = 0;
  822. return tgt;
  823. }
  824. tgt_align = sizeof (void *);
  825. tgt_size = 0;
  826. cbuf.chunks = NULL;
  827. cbuf.chunk_cnt = -1;
  828. cbuf.use_cnt = 0;
  829. cbuf.buf = NULL;
  830. if (mapnum > 1 || pragma_kind == GOMP_MAP_VARS_TARGET)
  831. {
  832. size_t chunks_size = (mapnum + 1) * sizeof (struct gomp_coalesce_chunk);
  833. cbuf.chunks = (struct gomp_coalesce_chunk *) gomp_alloca (chunks_size);
  834. cbuf.chunk_cnt = 0;
  835. }
  836. if (pragma_kind == GOMP_MAP_VARS_TARGET)
  837. {
  838. size_t align = 4 * sizeof (void *);
  839. tgt_align = align;
  840. tgt_size = mapnum * sizeof (void *);
  841. cbuf.chunk_cnt = 1;
  842. cbuf.use_cnt = 1 + (mapnum > 1);
  843. cbuf.chunks[0].start = 0;
  844. cbuf.chunks[0].end = tgt_size;
  845. }
  846. gomp_mutex_lock (&devicep->lock);
  847. if (devicep->state == GOMP_DEVICE_FINALIZED)
  848. {
  849. gomp_mutex_unlock (&devicep->lock);
  850. free (tgt);
  851. return NULL;
  852. }
  853. for (i = 0; i < mapnum; i++)
  854. {
  855. int kind = get_kind (short_mapkind, kinds, i);
  856. bool implicit = get_implicit (short_mapkind, kinds, i);
  857. if (hostaddrs[i] == NULL
  858. || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT)
  859. {
  860. tgt->list[i].key = NULL;
  861. tgt->list[i].offset = OFFSET_INLINED;
  862. continue;
  863. }
  864. else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR
  865. || (kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
  866. {
  867. tgt->list[i].key = NULL;
  868. if (!not_found_cnt)
  869. {
  870. /* In OpenMP < 5.0 and OpenACC the mapping has to be done
  871. on a separate construct prior to using use_device_{addr,ptr}.
  872. In OpenMP 5.0, map directives need to be ordered by the
  873. middle-end before the use_device_* clauses. If
  874. !not_found_cnt, all mappings requested (if any) are already
  875. mapped, so use_device_{addr,ptr} can be resolved right away.
  876. Otherwise, if not_found_cnt, gomp_map_lookup might fail
  877. now but would succeed after performing the mappings in the
  878. following loop. We can't defer this always to the second
  879. loop, because it is not even invoked when !not_found_cnt
  880. after the first loop. */
  881. cur_node.host_start = (uintptr_t) hostaddrs[i];
  882. cur_node.host_end = cur_node.host_start;
  883. splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
  884. if (n != NULL)
  885. {
  886. cur_node.host_start -= n->host_start;
  887. hostaddrs[i]
  888. = (void *) (n->tgt->tgt_start + n->tgt_offset
  889. + cur_node.host_start);
  890. }
  891. else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
  892. {
  893. gomp_mutex_unlock (&devicep->lock);
  894. gomp_fatal ("use_device_ptr pointer wasn't mapped");
  895. }
  896. else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
  897. /* If not present, continue using the host address. */
  898. ;
  899. else
  900. __builtin_unreachable ();
  901. tgt->list[i].offset = OFFSET_INLINED;
  902. }
  903. else
  904. tgt->list[i].offset = 0;
  905. continue;
  906. }
  907. else if ((kind & typemask) == GOMP_MAP_STRUCT)
  908. {
  909. size_t first = i + 1;
  910. size_t last = i + sizes[i];
  911. cur_node.host_start = (uintptr_t) hostaddrs[i];
  912. cur_node.host_end = (uintptr_t) hostaddrs[last]
  913. + sizes[last];
  914. tgt->list[i].key = NULL;
  915. tgt->list[i].offset = OFFSET_STRUCT;
  916. splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
  917. if (n == NULL)
  918. {
  919. size_t align = (size_t) 1 << (kind >> rshift);
  920. if (tgt_align < align)
  921. tgt_align = align;
  922. tgt_size -= (uintptr_t) hostaddrs[first] - cur_node.host_start;
  923. tgt_size = (tgt_size + align - 1) & ~(align - 1);
  924. tgt_size += cur_node.host_end - cur_node.host_start;
  925. not_found_cnt += last - i;
  926. for (i = first; i <= last; i++)
  927. {
  928. tgt->list[i].key = NULL;
  929. if (!aq
  930. && gomp_to_device_kind_p (get_kind (short_mapkind, kinds, i)
  931. & typemask))
  932. gomp_coalesce_buf_add (&cbuf,
  933. tgt_size - cur_node.host_end
  934. + (uintptr_t) hostaddrs[i],
  935. sizes[i]);
  936. }
  937. i--;
  938. continue;
  939. }
  940. for (i = first; i <= last; i++)
  941. gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
  942. sizes, kinds, NULL, refcount_set);
  943. i--;
  944. continue;
  945. }
  946. else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER)
  947. {
  948. tgt->list[i].key = NULL;
  949. tgt->list[i].offset = OFFSET_POINTER;
  950. has_firstprivate = true;
  951. continue;
  952. }
  953. else if ((kind & typemask) == GOMP_MAP_ATTACH
  954. || ((kind & typemask)
  955. == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION))
  956. {
  957. tgt->list[i].key = NULL;
  958. has_firstprivate = true;
  959. continue;
  960. }
  961. cur_node.host_start = (uintptr_t) hostaddrs[i];
  962. if (!GOMP_MAP_POINTER_P (kind & typemask))
  963. cur_node.host_end = cur_node.host_start + sizes[i];
  964. else
  965. cur_node.host_end = cur_node.host_start + sizeof (void *);
  966. if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE)
  967. {
  968. tgt->list[i].key = NULL;
  969. size_t align = (size_t) 1 << (kind >> rshift);
  970. if (tgt_align < align)
  971. tgt_align = align;
  972. tgt_size = (tgt_size + align - 1) & ~(align - 1);
  973. if (!aq)
  974. gomp_coalesce_buf_add (&cbuf, tgt_size,
  975. cur_node.host_end - cur_node.host_start);
  976. tgt_size += cur_node.host_end - cur_node.host_start;
  977. has_firstprivate = true;
  978. continue;
  979. }
  980. splay_tree_key n;
  981. if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
  982. {
  983. n = gomp_map_0len_lookup (mem_map, &cur_node);
  984. if (!n)
  985. {
  986. tgt->list[i].key = NULL;
  987. tgt->list[i].offset = OFFSET_POINTER;
  988. continue;
  989. }
  990. }
  991. else
  992. n = splay_tree_lookup (mem_map, &cur_node);
  993. if (n && n->refcount != REFCOUNT_LINK)
  994. {
  995. int always_to_cnt = 0;
  996. if ((kind & typemask) == GOMP_MAP_TO_PSET)
  997. {
  998. bool has_nullptr = false;
  999. size_t j;
  1000. for (j = 0; j < n->tgt->list_count; j++)
  1001. if (n->tgt->list[j].key == n)
  1002. {
  1003. has_nullptr = n->tgt->list[j].has_null_ptr_assoc;
  1004. break;
  1005. }
  1006. if (n->tgt->list_count == 0)
  1007. {
  1008. /* 'declare target'; assume has_nullptr; it could also be
  1009. statically assigned pointer, but that it should be to
  1010. the equivalent variable on the host. */
  1011. assert (n->refcount == REFCOUNT_INFINITY);
  1012. has_nullptr = true;
  1013. }
  1014. else
  1015. assert (j < n->tgt->list_count);
  1016. /* Re-map the data if there is an 'always' modifier or if it a
  1017. null pointer was there and non a nonnull has been found; that
  1018. permits transparent re-mapping for Fortran array descriptors
  1019. which were previously mapped unallocated. */
  1020. for (j = i + 1; j < mapnum; j++)
  1021. {
  1022. int ptr_kind = get_kind (short_mapkind, kinds, j) & typemask;
  1023. if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind)
  1024. && (!has_nullptr
  1025. || !GOMP_MAP_POINTER_P (ptr_kind)
  1026. || *(void **) hostaddrs[j] == NULL))
  1027. break;
  1028. else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
  1029. || ((uintptr_t) hostaddrs[j] + sizeof (void *)
  1030. > cur_node.host_end))
  1031. break;
  1032. else
  1033. {
  1034. has_always_ptrset = true;
  1035. ++always_to_cnt;
  1036. }
  1037. }
  1038. }
  1039. gomp_map_vars_existing (devicep, aq, n, &cur_node, &tgt->list[i],
  1040. kind & typemask, always_to_cnt > 0, implicit,
  1041. NULL, refcount_set);
  1042. i += always_to_cnt;
  1043. }
  1044. else
  1045. {
  1046. tgt->list[i].key = NULL;
  1047. if ((kind & typemask) == GOMP_MAP_IF_PRESENT)
  1048. {
  1049. /* Not present, hence, skip entry - including its MAP_POINTER,
  1050. when existing. */
  1051. tgt->list[i].offset = OFFSET_POINTER;
  1052. if (i + 1 < mapnum
  1053. && ((typemask & get_kind (short_mapkind, kinds, i + 1))
  1054. == GOMP_MAP_POINTER))
  1055. {
  1056. ++i;
  1057. tgt->list[i].key = NULL;
  1058. tgt->list[i].offset = 0;
  1059. }
  1060. continue;
  1061. }
  1062. size_t align = (size_t) 1 << (kind >> rshift);
  1063. not_found_cnt++;
  1064. if (tgt_align < align)
  1065. tgt_align = align;
  1066. tgt_size = (tgt_size + align - 1) & ~(align - 1);
  1067. if (!aq
  1068. && gomp_to_device_kind_p (kind & typemask))
  1069. gomp_coalesce_buf_add (&cbuf, tgt_size,
  1070. cur_node.host_end - cur_node.host_start);
  1071. tgt_size += cur_node.host_end - cur_node.host_start;
  1072. if ((kind & typemask) == GOMP_MAP_TO_PSET)
  1073. {
  1074. size_t j;
  1075. int kind;
  1076. for (j = i + 1; j < mapnum; j++)
  1077. if (!GOMP_MAP_POINTER_P ((kind = (get_kind (short_mapkind,
  1078. kinds, j)) & typemask))
  1079. && !GOMP_MAP_ALWAYS_POINTER_P (kind))
  1080. break;
  1081. else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
  1082. || ((uintptr_t) hostaddrs[j] + sizeof (void *)
  1083. > cur_node.host_end))
  1084. break;
  1085. else
  1086. {
  1087. tgt->list[j].key = NULL;
  1088. i++;
  1089. }
  1090. }
  1091. }
  1092. }
  1093. if (devaddrs)
  1094. {
  1095. if (mapnum != 1)
  1096. {
  1097. gomp_mutex_unlock (&devicep->lock);
  1098. gomp_fatal ("unexpected aggregation");
  1099. }
  1100. tgt->to_free = devaddrs[0];
  1101. tgt->tgt_start = (uintptr_t) tgt->to_free;
  1102. tgt->tgt_end = tgt->tgt_start + sizes[0];
  1103. }
  1104. else if (not_found_cnt || pragma_kind == GOMP_MAP_VARS_TARGET)
  1105. {
  1106. /* Allocate tgt_align aligned tgt_size block of memory. */
  1107. /* FIXME: Perhaps change interface to allocate properly aligned
  1108. memory. */
  1109. tgt->to_free = devicep->alloc_func (devicep->target_id,
  1110. tgt_size + tgt_align - 1);
  1111. if (!tgt->to_free)
  1112. {
  1113. gomp_mutex_unlock (&devicep->lock);
  1114. gomp_fatal ("device memory allocation fail");
  1115. }
  1116. tgt->tgt_start = (uintptr_t) tgt->to_free;
  1117. tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
  1118. tgt->tgt_end = tgt->tgt_start + tgt_size;
  1119. if (cbuf.use_cnt == 1)
  1120. cbuf.chunk_cnt--;
  1121. if (cbuf.chunk_cnt > 0)
  1122. {
  1123. cbuf.buf
  1124. = malloc (cbuf.chunks[cbuf.chunk_cnt - 1].end - cbuf.chunks[0].start);
  1125. if (cbuf.buf)
  1126. {
  1127. cbuf.tgt = tgt;
  1128. cbufp = &cbuf;
  1129. }
  1130. }
  1131. }
  1132. else
  1133. {
  1134. tgt->to_free = NULL;
  1135. tgt->tgt_start = 0;
  1136. tgt->tgt_end = 0;
  1137. }
  1138. tgt_size = 0;
  1139. if (pragma_kind == GOMP_MAP_VARS_TARGET)
  1140. tgt_size = mapnum * sizeof (void *);
  1141. tgt->array = NULL;
  1142. if (not_found_cnt || has_firstprivate || has_always_ptrset)
  1143. {
  1144. if (not_found_cnt)
  1145. tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
  1146. splay_tree_node array = tgt->array;
  1147. size_t j, field_tgt_offset = 0, field_tgt_clear = FIELD_TGT_EMPTY;
  1148. uintptr_t field_tgt_base = 0;
  1149. splay_tree_key field_tgt_structelem_first = NULL;
  1150. for (i = 0; i < mapnum; i++)
  1151. if (has_always_ptrset
  1152. && tgt->list[i].key
  1153. && (get_kind (short_mapkind, kinds, i) & typemask)
  1154. == GOMP_MAP_TO_PSET)
  1155. {
  1156. splay_tree_key k = tgt->list[i].key;
  1157. bool has_nullptr = false;
  1158. size_t j;
  1159. for (j = 0; j < k->tgt->list_count; j++)
  1160. if (k->tgt->list[j].key == k)
  1161. {
  1162. has_nullptr = k->tgt->list[j].has_null_ptr_assoc;
  1163. break;
  1164. }
  1165. if (k->tgt->list_count == 0)
  1166. has_nullptr = true;
  1167. else
  1168. assert (j < k->tgt->list_count);
  1169. tgt->list[i].has_null_ptr_assoc = false;
  1170. for (j = i + 1; j < mapnum; j++)
  1171. {
  1172. int ptr_kind = get_kind (short_mapkind, kinds, j) & typemask;
  1173. if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind)
  1174. && (!has_nullptr
  1175. || !GOMP_MAP_POINTER_P (ptr_kind)
  1176. || *(void **) hostaddrs[j] == NULL))
  1177. break;
  1178. else if ((uintptr_t) hostaddrs[j] < k->host_start
  1179. || ((uintptr_t) hostaddrs[j] + sizeof (void *)
  1180. > k->host_end))
  1181. break;
  1182. else
  1183. {
  1184. if (*(void **) hostaddrs[j] == NULL)
  1185. tgt->list[i].has_null_ptr_assoc = true;
  1186. tgt->list[j].key = k;
  1187. tgt->list[j].copy_from = false;
  1188. tgt->list[j].always_copy_from = false;
  1189. tgt->list[j].is_attach = false;
  1190. gomp_increment_refcount (k, refcount_set);
  1191. gomp_map_pointer (k->tgt, aq,
  1192. (uintptr_t) *(void **) hostaddrs[j],
  1193. k->tgt_offset + ((uintptr_t) hostaddrs[j]
  1194. - k->host_start),
  1195. sizes[j], cbufp, false);
  1196. }
  1197. }
  1198. i = j - 1;
  1199. }
  1200. else if (tgt->list[i].key == NULL)
  1201. {
  1202. int kind = get_kind (short_mapkind, kinds, i);
  1203. bool implicit = get_implicit (short_mapkind, kinds, i);
  1204. if (hostaddrs[i] == NULL)
  1205. continue;
  1206. switch (kind & typemask)
  1207. {
  1208. size_t align, len, first, last;
  1209. splay_tree_key n;
  1210. case GOMP_MAP_FIRSTPRIVATE:
  1211. align = (size_t) 1 << (kind >> rshift);
  1212. tgt_size = (tgt_size + align - 1) & ~(align - 1);
  1213. tgt->list[i].offset = tgt_size;
  1214. len = sizes[i];
  1215. gomp_copy_host2dev (devicep, aq,
  1216. (void *) (tgt->tgt_start + tgt_size),
  1217. (void *) hostaddrs[i], len, false, cbufp);
  1218. tgt_size += len;
  1219. continue;
  1220. case GOMP_MAP_FIRSTPRIVATE_INT:
  1221. case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
  1222. continue;
  1223. case GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT:
  1224. /* The OpenACC 'host_data' construct only allows 'use_device'
  1225. "mapping" clauses, so in the first loop, 'not_found_cnt'
  1226. must always have been zero, so all OpenACC 'use_device'
  1227. clauses have already been handled. (We can only easily test
  1228. 'use_device' with 'if_present' clause here.) */
  1229. assert (tgt->list[i].offset == OFFSET_INLINED);
  1230. /* Nevertheless, FALLTHRU to the normal handling, to keep the
  1231. code conceptually simple, similar to the first loop. */
  1232. case GOMP_MAP_USE_DEVICE_PTR:
  1233. if (tgt->list[i].offset == 0)
  1234. {
  1235. cur_node.host_start = (uintptr_t) hostaddrs[i];
  1236. cur_node.host_end = cur_node.host_start;
  1237. n = gomp_map_lookup (mem_map, &cur_node);
  1238. if (n != NULL)
  1239. {
  1240. cur_node.host_start -= n->host_start;
  1241. hostaddrs[i]
  1242. = (void *) (n->tgt->tgt_start + n->tgt_offset
  1243. + cur_node.host_start);
  1244. }
  1245. else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
  1246. {
  1247. gomp_mutex_unlock (&devicep->lock);
  1248. gomp_fatal ("use_device_ptr pointer wasn't mapped");
  1249. }
  1250. else if ((kind & typemask)
  1251. == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
  1252. /* If not present, continue using the host address. */
  1253. ;
  1254. else
  1255. __builtin_unreachable ();
  1256. tgt->list[i].offset = OFFSET_INLINED;
  1257. }
  1258. continue;
  1259. case GOMP_MAP_STRUCT:
  1260. first = i + 1;
  1261. last = i + sizes[i];
  1262. cur_node.host_start = (uintptr_t) hostaddrs[i];
  1263. cur_node.host_end = (uintptr_t) hostaddrs[last]
  1264. + sizes[last];
  1265. if (tgt->list[first].key != NULL)
  1266. continue;
  1267. n = splay_tree_lookup (mem_map, &cur_node);
  1268. if (n == NULL)
  1269. {
  1270. size_t align = (size_t) 1 << (kind >> rshift);
  1271. tgt_size -= (uintptr_t) hostaddrs[first]
  1272. - (uintptr_t) hostaddrs[i];
  1273. tgt_size = (tgt_size + align - 1) & ~(align - 1);
  1274. tgt_size += (uintptr_t) hostaddrs[first]
  1275. - (uintptr_t) hostaddrs[i];
  1276. field_tgt_base = (uintptr_t) hostaddrs[first];
  1277. field_tgt_offset = tgt_size;
  1278. field_tgt_clear = last;
  1279. field_tgt_structelem_first = NULL;
  1280. tgt_size += cur_node.host_end
  1281. - (uintptr_t) hostaddrs[first];
  1282. continue;
  1283. }
  1284. for (i = first; i <= last; i++)
  1285. gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
  1286. sizes, kinds, cbufp, refcount_set);
  1287. i--;
  1288. continue;
  1289. case GOMP_MAP_ALWAYS_POINTER:
  1290. cur_node.host_start = (uintptr_t) hostaddrs[i];
  1291. cur_node.host_end = cur_node.host_start + sizeof (void *);
  1292. n = splay_tree_lookup (mem_map, &cur_node);
  1293. if (n == NULL
  1294. || n->host_start > cur_node.host_start
  1295. || n->host_end < cur_node.host_end)
  1296. {
  1297. gomp_mutex_unlock (&devicep->lock);
  1298. gomp_fatal ("always pointer not mapped");
  1299. }
  1300. if ((get_kind (short_mapkind, kinds, i - 1) & typemask)
  1301. != GOMP_MAP_ALWAYS_POINTER)
  1302. cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
  1303. if (cur_node.tgt_offset)
  1304. cur_node.tgt_offset -= sizes[i];
  1305. gomp_copy_host2dev (devicep, aq,
  1306. (void *) (n->tgt->tgt_start
  1307. + n->tgt_offset
  1308. + cur_node.host_start
  1309. - n->host_start),
  1310. (void *) &cur_node.tgt_offset,
  1311. sizeof (void *), true, cbufp);
  1312. cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
  1313. + cur_node.host_start - n->host_start;
  1314. continue;
  1315. case GOMP_MAP_IF_PRESENT:
  1316. /* Not present - otherwise handled above. Skip over its
  1317. MAP_POINTER as well. */
  1318. if (i + 1 < mapnum
  1319. && ((typemask & get_kind (short_mapkind, kinds, i + 1))
  1320. == GOMP_MAP_POINTER))
  1321. ++i;
  1322. continue;
  1323. case GOMP_MAP_ATTACH:
  1324. case GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION:
  1325. {
  1326. cur_node.host_start = (uintptr_t) hostaddrs[i];
  1327. cur_node.host_end = cur_node.host_start + sizeof (void *);
  1328. splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
  1329. if (n != NULL)
  1330. {
  1331. tgt->list[i].key = n;
  1332. tgt->list[i].offset = cur_node.host_start - n->host_start;
  1333. tgt->list[i].length = n->host_end - n->host_start;
  1334. tgt->list[i].copy_from = false;
  1335. tgt->list[i].always_copy_from = false;
  1336. tgt->list[i].is_attach = true;
  1337. /* OpenACC 'attach'/'detach' doesn't affect
  1338. structured/dynamic reference counts ('n->refcount',
  1339. 'n->dynamic_refcount'). */
  1340. bool zlas
  1341. = ((kind & typemask)
  1342. == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION);
  1343. gomp_attach_pointer (devicep, aq, mem_map, n,
  1344. (uintptr_t) hostaddrs[i], sizes[i],
  1345. cbufp, zlas);
  1346. }
  1347. else if ((pragma_kind & GOMP_MAP_VARS_OPENACC) != 0)
  1348. {
  1349. gomp_mutex_unlock (&devicep->lock);
  1350. gomp_fatal ("outer struct not mapped for attach");
  1351. }
  1352. continue;
  1353. }
  1354. default:
  1355. break;
  1356. }
  1357. splay_tree_key k = &array->key;
  1358. k->host_start = (uintptr_t) hostaddrs[i];
  1359. if (!GOMP_MAP_POINTER_P (kind & typemask))
  1360. k->host_end = k->host_start + sizes[i];
  1361. else
  1362. k->host_end = k->host_start + sizeof (void *);
  1363. splay_tree_key n = splay_tree_lookup (mem_map, k);
  1364. if (n && n->refcount != REFCOUNT_LINK)
  1365. gomp_map_vars_existing (devicep, aq, n, k, &tgt->list[i],
  1366. kind & typemask, false, implicit, cbufp,
  1367. refcount_set);
  1368. else
  1369. {
  1370. k->aux = NULL;
  1371. if (n && n->refcount == REFCOUNT_LINK)
  1372. {
  1373. /* Replace target address of the pointer with target address
  1374. of mapped object in the splay tree. */
  1375. splay_tree_remove (mem_map, n);
  1376. k->aux
  1377. = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
  1378. k->aux->link_key = n;
  1379. }
  1380. size_t align = (size_t) 1 << (kind >> rshift);
  1381. tgt->list[i].key = k;
  1382. k->tgt = tgt;
  1383. k->refcount = 0;
  1384. k->dynamic_refcount = 0;
  1385. if (field_tgt_clear != FIELD_TGT_EMPTY)
  1386. {
  1387. k->tgt_offset = k->host_start - field_tgt_base
  1388. + field_tgt_offset;
  1389. if (openmp_p)
  1390. {
  1391. k->refcount = REFCOUNT_STRUCTELEM;
  1392. if (field_tgt_structelem_first == NULL)
  1393. {
  1394. /* Set to first structure element of sequence. */
  1395. k->refcount |= REFCOUNT_STRUCTELEM_FLAG_FIRST;
  1396. field_tgt_structelem_first = k;
  1397. }
  1398. else
  1399. /* Point to refcount of leading element, but do not
  1400. increment again. */
  1401. k->structelem_refcount_ptr
  1402. = &field_tgt_structelem_first->structelem_refcount;
  1403. if (i == field_tgt_clear)
  1404. {
  1405. k->refcount |= REFCOUNT_STRUCTELEM_FLAG_LAST;
  1406. field_tgt_structelem_first = NULL;
  1407. }
  1408. }
  1409. if (i == field_tgt_clear)
  1410. field_tgt_clear = FIELD_TGT_EMPTY;
  1411. }
  1412. else
  1413. {
  1414. tgt_size = (tgt_size + align - 1) & ~(align - 1);
  1415. k->tgt_offset = tgt_size;
  1416. tgt_size += k->host_end - k->host_start;
  1417. }
  1418. /* First increment, from 0 to 1. gomp_increment_refcount
  1419. encapsulates the different increment cases, so use this
  1420. instead of directly setting 1 during initialization. */
  1421. gomp_increment_refcount (k, refcount_set);
  1422. tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
  1423. tgt->list[i].always_copy_from
  1424. = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
  1425. tgt->list[i].is_attach = false;
  1426. tgt->list[i].offset = 0;
  1427. tgt->list[i].length = k->host_end - k->host_start;
  1428. tgt->refcount++;
  1429. array->left = NULL;
  1430. array->right = NULL;
  1431. splay_tree_insert (mem_map, array);
  1432. switch (kind & typemask)
  1433. {
  1434. case GOMP_MAP_ALLOC:
  1435. case GOMP_MAP_FROM:
  1436. case GOMP_MAP_FORCE_ALLOC:
  1437. case GOMP_MAP_FORCE_FROM:
  1438. case GOMP_MAP_ALWAYS_FROM:
  1439. break;
  1440. case GOMP_MAP_TO:
  1441. case GOMP_MAP_TOFROM:
  1442. case GOMP_MAP_FORCE_TO:
  1443. case GOMP_MAP_FORCE_TOFROM:
  1444. case GOMP_MAP_ALWAYS_TO:
  1445. case GOMP_MAP_ALWAYS_TOFROM:
  1446. gomp_copy_host2dev (devicep, aq,
  1447. (void *) (tgt->tgt_start
  1448. + k->tgt_offset),
  1449. (void *) k->host_start,
  1450. k->host_end - k->host_start,
  1451. false, cbufp);
  1452. break;
  1453. case GOMP_MAP_POINTER:
  1454. case GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION:
  1455. gomp_map_pointer
  1456. (tgt, aq, (uintptr_t) *(void **) k->host_start,
  1457. k->tgt_offset, sizes[i], cbufp,
  1458. ((kind & typemask)
  1459. == GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION));
  1460. break;
  1461. case GOMP_MAP_TO_PSET:
  1462. gomp_copy_host2dev (devicep, aq,
  1463. (void *) (tgt->tgt_start
  1464. + k->tgt_offset),
  1465. (void *) k->host_start,
  1466. k->host_end - k->host_start,
  1467. false, cbufp);
  1468. tgt->list[i].has_null_ptr_assoc = false;
  1469. for (j = i + 1; j < mapnum; j++)
  1470. {
  1471. int ptr_kind = (get_kind (short_mapkind, kinds, j)
  1472. & typemask);
  1473. if (!GOMP_MAP_POINTER_P (ptr_kind)
  1474. && !GOMP_MAP_ALWAYS_POINTER_P (ptr_kind))
  1475. break;
  1476. else if ((uintptr_t) hostaddrs[j] < k->host_start
  1477. || ((uintptr_t) hostaddrs[j] + sizeof (void *)
  1478. > k->host_end))
  1479. break;
  1480. else
  1481. {
  1482. tgt->list[j].key = k;
  1483. tgt->list[j].copy_from = false;
  1484. tgt->list[j].always_copy_from = false;
  1485. tgt->list[j].is_attach = false;
  1486. tgt->list[i].has_null_ptr_assoc |= !(*(void **) hostaddrs[j]);
  1487. /* For OpenMP, the use of refcount_sets causes
  1488. errors if we set k->refcount = 1 above but also
  1489. increment it again here, for decrementing will
  1490. not properly match, since we decrement only once
  1491. for each key's refcount. Therefore avoid this
  1492. increment for OpenMP constructs. */
  1493. if (!openmp_p)
  1494. gomp_increment_refcount (k, refcount_set);
  1495. gomp_map_pointer (tgt, aq,
  1496. (uintptr_t) *(void **) hostaddrs[j],
  1497. k->tgt_offset
  1498. + ((uintptr_t) hostaddrs[j]
  1499. - k->host_start),
  1500. sizes[j], cbufp, false);
  1501. }
  1502. }
  1503. i = j - 1;
  1504. break;
  1505. case GOMP_MAP_FORCE_PRESENT:
  1506. {
  1507. /* We already looked up the memory region above and it
  1508. was missing. */
  1509. size_t size = k->host_end - k->host_start;
  1510. gomp_mutex_unlock (&devicep->lock);
  1511. #ifdef HAVE_INTTYPES_H
  1512. gomp_fatal ("present clause: !acc_is_present (%p, "
  1513. "%"PRIu64" (0x%"PRIx64"))",
  1514. (void *) k->host_start,
  1515. (uint64_t) size, (uint64_t) size);
  1516. #else
  1517. gomp_fatal ("present clause: !acc_is_present (%p, "
  1518. "%lu (0x%lx))", (void *) k->host_start,
  1519. (unsigned long) size, (unsigned long) size);
  1520. #endif
  1521. }
  1522. break;
  1523. case GOMP_MAP_FORCE_DEVICEPTR:
  1524. assert (k->host_end - k->host_start == sizeof (void *));
  1525. gomp_copy_host2dev (devicep, aq,
  1526. (void *) (tgt->tgt_start
  1527. + k->tgt_offset),
  1528. (void *) k->host_start,
  1529. sizeof (void *), false, cbufp);
  1530. break;
  1531. default:
  1532. gomp_mutex_unlock (&devicep->lock);
  1533. gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
  1534. kind);
  1535. }
  1536. if (k->aux && k->aux->link_key)
  1537. {
  1538. /* Set link pointer on target to the device address of the
  1539. mapped object. */
  1540. void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
  1541. /* We intentionally do not use coalescing here, as it's not
  1542. data allocated by the current call to this function. */
  1543. gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset,
  1544. &tgt_addr, sizeof (void *), true, NULL);
  1545. }
  1546. array++;
  1547. }
  1548. }
  1549. }
  1550. if (pragma_kind == GOMP_MAP_VARS_TARGET)
  1551. {
  1552. for (i = 0; i < mapnum; i++)
  1553. {
  1554. cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
  1555. gomp_copy_host2dev (devicep, aq,
  1556. (void *) (tgt->tgt_start + i * sizeof (void *)),
  1557. (void *) &cur_node.tgt_offset, sizeof (void *),
  1558. true, cbufp);
  1559. }
  1560. }
  1561. if (cbufp)
  1562. {
  1563. /* See 'gomp_coalesce_buf_add'. */
  1564. assert (!aq);
  1565. long c = 0;
  1566. for (c = 0; c < cbuf.chunk_cnt; ++c)
  1567. gomp_copy_host2dev (devicep, aq,
  1568. (void *) (tgt->tgt_start + cbuf.chunks[c].start),
  1569. (char *) cbuf.buf + (cbuf.chunks[c].start
  1570. - cbuf.chunks[0].start),
  1571. cbuf.chunks[c].end - cbuf.chunks[c].start,
  1572. true, NULL);
  1573. free (cbuf.buf);
  1574. cbuf.buf = NULL;
  1575. cbufp = NULL;
  1576. }
  1577. /* If the variable from "omp target enter data" map-list was already mapped,
  1578. tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
  1579. gomp_exit_data. */
  1580. if ((pragma_kind & GOMP_MAP_VARS_ENTER_DATA) && tgt->refcount == 0)
  1581. {
  1582. free (tgt);
  1583. tgt = NULL;
  1584. }
  1585. gomp_mutex_unlock (&devicep->lock);
  1586. return tgt;
  1587. }
  1588. static struct target_mem_desc *
  1589. gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
  1590. void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
  1591. bool short_mapkind, htab_t *refcount_set,
  1592. enum gomp_map_vars_kind pragma_kind)
  1593. {
  1594. /* This management of a local refcount_set is for convenience of callers
  1595. who do not share a refcount_set over multiple map/unmap uses. */
  1596. htab_t local_refcount_set = NULL;
  1597. if (refcount_set == NULL)
  1598. {
  1599. local_refcount_set = htab_create (mapnum);
  1600. refcount_set = &local_refcount_set;
  1601. }
  1602. struct target_mem_desc *tgt;
  1603. tgt = gomp_map_vars_internal (devicep, NULL, mapnum, hostaddrs, devaddrs,
  1604. sizes, kinds, short_mapkind, refcount_set,
  1605. pragma_kind);
  1606. if (local_refcount_set)
  1607. htab_free (local_refcount_set);
  1608. return tgt;
  1609. }
  1610. attribute_hidden struct target_mem_desc *
  1611. goacc_map_vars (struct gomp_device_descr *devicep,
  1612. struct goacc_asyncqueue *aq, size_t mapnum,
  1613. void **hostaddrs, void **devaddrs, size_t *sizes,
  1614. void *kinds, bool short_mapkind,
  1615. enum gomp_map_vars_kind pragma_kind)
  1616. {
  1617. return gomp_map_vars_internal (devicep, aq, mapnum, hostaddrs, devaddrs,
  1618. sizes, kinds, short_mapkind, NULL,
  1619. GOMP_MAP_VARS_OPENACC | pragma_kind);
  1620. }
  1621. static void
  1622. gomp_unmap_tgt (struct target_mem_desc *tgt)
  1623. {
  1624. /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
  1625. if (tgt->tgt_end)
  1626. gomp_free_device_memory (tgt->device_descr, tgt->to_free);
  1627. free (tgt->array);
  1628. free (tgt);
  1629. }
  1630. static bool
  1631. gomp_unref_tgt (void *ptr)
  1632. {
  1633. bool is_tgt_unmapped = false;
  1634. struct target_mem_desc *tgt = (struct target_mem_desc *) ptr;
  1635. if (tgt->refcount > 1)
  1636. tgt->refcount--;
  1637. else
  1638. {
  1639. gomp_unmap_tgt (tgt);
  1640. is_tgt_unmapped = true;
  1641. }
  1642. return is_tgt_unmapped;
  1643. }
  1644. static void
  1645. gomp_unref_tgt_void (void *ptr)
  1646. {
  1647. (void) gomp_unref_tgt (ptr);
  1648. }
  1649. static void
  1650. gomp_remove_splay_tree_key (splay_tree sp, splay_tree_key k)
  1651. {
  1652. splay_tree_remove (sp, k);
  1653. if (k->aux)
  1654. {
  1655. if (k->aux->link_key)
  1656. splay_tree_insert (sp, (splay_tree_node) k->aux->link_key);
  1657. if (k->aux->attach_count)
  1658. free (k->aux->attach_count);
  1659. free (k->aux);
  1660. k->aux = NULL;
  1661. }
  1662. }
  1663. static inline __attribute__((always_inline)) bool
  1664. gomp_remove_var_internal (struct gomp_device_descr *devicep, splay_tree_key k,
  1665. struct goacc_asyncqueue *aq)
  1666. {
  1667. bool is_tgt_unmapped = false;
  1668. if (REFCOUNT_STRUCTELEM_P (k->refcount))
  1669. {
  1670. if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount) == false)
  1671. /* Infer the splay_tree_key of the first structelem key using the
  1672. pointer to the first structleme_refcount. */
  1673. k = (splay_tree_key) ((char *) k->structelem_refcount_ptr
  1674. - offsetof (struct splay_tree_key_s,
  1675. structelem_refcount));
  1676. assert (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount));
  1677. /* The array created by gomp_map_vars is an array of splay_tree_nodes,
  1678. with the splay_tree_keys embedded inside. */
  1679. splay_tree_node node =
  1680. (splay_tree_node) ((char *) k
  1681. - offsetof (struct splay_tree_node_s, key));
  1682. while (true)
  1683. {
  1684. /* Starting from the _FIRST key, and continue for all following
  1685. sibling keys. */
  1686. gomp_remove_splay_tree_key (&devicep->mem_map, k);
  1687. if (REFCOUNT_STRUCTELEM_LAST_P (k->refcount))
  1688. break;
  1689. else
  1690. k = &(++node)->key;
  1691. }
  1692. }
  1693. else
  1694. gomp_remove_splay_tree_key (&devicep->mem_map, k);
  1695. if (aq)
  1696. devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
  1697. (void *) k->tgt);
  1698. else
  1699. is_tgt_unmapped = gomp_unref_tgt ((void *) k->tgt);
  1700. return is_tgt_unmapped;
  1701. }
  1702. attribute_hidden bool
  1703. gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
  1704. {
  1705. return gomp_remove_var_internal (devicep, k, NULL);
  1706. }
  1707. /* Remove a variable asynchronously. This actually removes the variable
  1708. mapping immediately, but retains the linked target_mem_desc until the
  1709. asynchronous operation has completed (as it may still refer to target
  1710. memory). The device lock must be held before entry, and remains locked on
  1711. exit. */
  1712. attribute_hidden void
  1713. gomp_remove_var_async (struct gomp_device_descr *devicep, splay_tree_key k,
  1714. struct goacc_asyncqueue *aq)
  1715. {
  1716. (void) gomp_remove_var_internal (devicep, k, aq);
  1717. }
  1718. /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
  1719. variables back from device to host: if it is false, it is assumed that this
  1720. has been done already. */
  1721. static inline __attribute__((always_inline)) void
  1722. gomp_unmap_vars_internal (struct target_mem_desc *tgt, bool do_copyfrom,
  1723. htab_t *refcount_set, struct goacc_asyncqueue *aq)
  1724. {
  1725. struct gomp_device_descr *devicep = tgt->device_descr;
  1726. if (tgt->list_count == 0)
  1727. {
  1728. free (tgt);
  1729. return;
  1730. }
  1731. gomp_mutex_lock (&devicep->lock);
  1732. if (devicep->state == GOMP_DEVICE_FINALIZED)
  1733. {
  1734. gomp_mutex_unlock (&devicep->lock);
  1735. free (tgt->array);
  1736. free (tgt);
  1737. return;
  1738. }
  1739. size_t i;
  1740. /* We must perform detachments before any copies back to the host. */
  1741. for (i = 0; i < tgt->list_count; i++)
  1742. {
  1743. splay_tree_key k = tgt->list[i].key;
  1744. if (k != NULL && tgt->list[i].is_attach)
  1745. gomp_detach_pointer (devicep, aq, k, tgt->list[i].key->host_start
  1746. + tgt->list[i].offset,
  1747. false, NULL);
  1748. }
  1749. for (i = 0; i < tgt->list_count; i++)
  1750. {
  1751. splay_tree_key k = tgt->list[i].key;
  1752. if (k == NULL)
  1753. continue;
  1754. /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic reference
  1755. counts ('n->refcount', 'n->dynamic_refcount'). */
  1756. if (tgt->list[i].is_attach)
  1757. continue;
  1758. bool do_copy, do_remove;
  1759. gomp_decrement_refcount (k, refcount_set, false, &do_copy, &do_remove);
  1760. if ((do_copy && do_copyfrom && tgt->list[i].copy_from)
  1761. || tgt->list[i].always_copy_from)
  1762. gomp_copy_dev2host (devicep, aq,
  1763. (void *) (k->host_start + tgt->list[i].offset),
  1764. (void *) (k->tgt->tgt_start + k->tgt_offset
  1765. + tgt->list[i].offset),
  1766. tgt->list[i].length);
  1767. if (do_remove)
  1768. {
  1769. struct target_mem_desc *k_tgt = k->tgt;
  1770. bool is_tgt_unmapped = gomp_remove_var (devicep, k);
  1771. /* It would be bad if TGT got unmapped while we're still iterating
  1772. over its LIST_COUNT, and also expect to use it in the following
  1773. code. */
  1774. assert (!is_tgt_unmapped
  1775. || k_tgt != tgt);
  1776. }
  1777. }
  1778. if (aq)
  1779. devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
  1780. (void *) tgt);
  1781. else
  1782. gomp_unref_tgt ((void *) tgt);
  1783. gomp_mutex_unlock (&devicep->lock);
  1784. }
  1785. static void
  1786. gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom,
  1787. htab_t *refcount_set)
  1788. {
  1789. /* This management of a local refcount_set is for convenience of callers
  1790. who do not share a refcount_set over multiple map/unmap uses. */
  1791. htab_t local_refcount_set = NULL;
  1792. if (refcount_set == NULL)
  1793. {
  1794. local_refcount_set = htab_create (tgt->list_count);
  1795. refcount_set = &local_refcount_set;
  1796. }
  1797. gomp_unmap_vars_internal (tgt, do_copyfrom, refcount_set, NULL);
  1798. if (local_refcount_set)
  1799. htab_free (local_refcount_set);
  1800. }
  1801. attribute_hidden void
  1802. goacc_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom,
  1803. struct goacc_asyncqueue *aq)
  1804. {
  1805. gomp_unmap_vars_internal (tgt, do_copyfrom, NULL, aq);
  1806. }
  1807. static void
  1808. gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
  1809. size_t *sizes, void *kinds, bool short_mapkind)
  1810. {
  1811. size_t i;
  1812. struct splay_tree_key_s cur_node;
  1813. const int typemask = short_mapkind ? 0xff : 0x7;
  1814. if (!devicep)
  1815. return;
  1816. if (mapnum == 0)
  1817. return;
  1818. gomp_mutex_lock (&devicep->lock);
  1819. if (devicep->state == GOMP_DEVICE_FINALIZED)
  1820. {
  1821. gomp_mutex_unlock (&devicep->lock);
  1822. return;
  1823. }
  1824. for (i = 0; i < mapnum; i++)
  1825. if (sizes[i])
  1826. {
  1827. cur_node.host_start = (uintptr_t) hostaddrs[i];
  1828. cur_node.host_end = cur_node.host_start + sizes[i];
  1829. splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
  1830. if (n)
  1831. {
  1832. int kind = get_kind (short_mapkind, kinds, i);
  1833. if (n->host_start > cur_node.host_start
  1834. || n->host_end < cur_node.host_end)
  1835. {
  1836. gomp_mutex_unlock (&devicep->lock);
  1837. gomp_fatal ("Trying to update [%p..%p) object when "
  1838. "only [%p..%p) is mapped",
  1839. (void *) cur_node.host_start,
  1840. (void *) cur_node.host_end,
  1841. (void *) n->host_start,
  1842. (void *) n->host_end);
  1843. }
  1844. if (n->aux && n->aux->attach_count)
  1845. {
  1846. uintptr_t addr = cur_node.host_start;
  1847. while (addr < cur_node.host_end)
  1848. {
  1849. /* We have to be careful not to overwrite still attached
  1850. pointers during host<->device updates. */
  1851. size_t i = (addr - cur_node.host_start) / sizeof (void *);
  1852. if (n->aux->attach_count[i] == 0)
  1853. {
  1854. void *devaddr = (void *) (n->tgt->tgt_start
  1855. + n->tgt_offset
  1856. + addr - n->host_start);
  1857. if (GOMP_MAP_COPY_TO_P (kind & typemask))
  1858. gomp_copy_host2dev (devicep, NULL,
  1859. devaddr, (void *) addr,
  1860. sizeof (void *), false, NULL);
  1861. if (GOMP_MAP_COPY_FROM_P (kind & typemask))
  1862. gomp_copy_dev2host (devicep, NULL,
  1863. (void *) addr, devaddr,
  1864. sizeof (void *));
  1865. }
  1866. addr += sizeof (void *);
  1867. }
  1868. }
  1869. else
  1870. {
  1871. void *hostaddr = (void *) cur_node.host_start;
  1872. void *devaddr = (void *) (n->tgt->tgt_start + n->tgt_offset
  1873. + cur_node.host_start
  1874. - n->host_start);
  1875. size_t size = cur_node.host_end - cur_node.host_start;
  1876. if (GOMP_MAP_COPY_TO_P (kind & typemask))
  1877. gomp_copy_host2dev (devicep, NULL, devaddr, hostaddr, size,
  1878. false, NULL);
  1879. if (GOMP_MAP_COPY_FROM_P (kind & typemask))
  1880. gomp_copy_dev2host (devicep, NULL, hostaddr, devaddr, size);
  1881. }
  1882. }
  1883. }
  1884. gomp_mutex_unlock (&devicep->lock);
  1885. }
  1886. /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
  1887. And insert to splay tree the mapping between addresses from HOST_TABLE and
  1888. from loaded target image. We rely in the host and device compiler
  1889. emitting variable and functions in the same order. */
  1890. static void
  1891. gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
  1892. const void *host_table, const void *target_data,
  1893. bool is_register_lock)
  1894. {
  1895. void **host_func_table = ((void ***) host_table)[0];
  1896. void **host_funcs_end = ((void ***) host_table)[1];
  1897. void **host_var_table = ((void ***) host_table)[2];
  1898. void **host_vars_end = ((void ***) host_table)[3];
  1899. /* The func table contains only addresses, the var table contains addresses
  1900. and corresponding sizes. */
  1901. int num_funcs = host_funcs_end - host_func_table;
  1902. int num_vars = (host_vars_end - host_var_table) / 2;
  1903. /* Others currently is only 'device_num' */
  1904. int num_others = 1;
  1905. /* Load image to device and get target addresses for the image. */
  1906. struct addr_pair *target_table = NULL;
  1907. int i, num_target_entries;
  1908. num_target_entries
  1909. = devicep->load_image_func (devicep->target_id, version,
  1910. target_data, &target_table);
  1911. if (num_target_entries != num_funcs + num_vars
  1912. /* Others (device_num) are included as trailing entries in pair list. */
  1913. && num_target_entries != num_funcs + num_vars + num_others)
  1914. {
  1915. gomp_mutex_unlock (&devicep->lock);
  1916. if (is_register_lock)
  1917. gomp_mutex_unlock (&register_lock);
  1918. gomp_fatal ("Cannot map target functions or variables"
  1919. " (expected %u, have %u)", num_funcs + num_vars,
  1920. num_target_entries);
  1921. }
  1922. /* Insert host-target address mapping into splay tree. */
  1923. struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
  1924. tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array));
  1925. tgt->refcount = REFCOUNT_INFINITY;
  1926. tgt->tgt_start = 0;
  1927. tgt->tgt_end = 0;
  1928. tgt->to_free = NULL;
  1929. tgt->prev = NULL;
  1930. tgt->list_count = 0;
  1931. tgt->device_descr = devicep;
  1932. splay_tree_node array = tgt->array;
  1933. for (i = 0; i < num_funcs; i++)
  1934. {
  1935. splay_tree_key k = &array->key;
  1936. k->host_start = (uintptr_t) host_func_table[i];
  1937. k->host_end = k->host_start + 1;
  1938. k->tgt = tgt;
  1939. k->tgt_offset = target_table[i].start;
  1940. k->refcount = REFCOUNT_INFINITY;
  1941. k->dynamic_refcount = 0;
  1942. k->aux = NULL;
  1943. array->left = NULL;
  1944. array->right = NULL;
  1945. splay_tree_insert (&devicep->mem_map, array);
  1946. array++;
  1947. }
  1948. /* Most significant bit of the size in host and target tables marks
  1949. "omp declare target link" variables. */
  1950. const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
  1951. const uintptr_t size_mask = ~link_bit;
  1952. for (i = 0; i < num_vars; i++)
  1953. {
  1954. struct addr_pair *target_var = &target_table[num_funcs + i];
  1955. uintptr_t target_size = target_var->end - target_var->start;
  1956. bool is_link_var = link_bit & (uintptr_t) host_var_table[i * 2 + 1];
  1957. if (!is_link_var && (uintptr_t) host_var_table[i * 2 + 1] != target_size)
  1958. {
  1959. gomp_mutex_unlock (&devicep->lock);
  1960. if (is_register_lock)
  1961. gomp_mutex_unlock (&register_lock);
  1962. gomp_fatal ("Cannot map target variables (size mismatch)");
  1963. }
  1964. splay_tree_key k = &array->key;
  1965. k->host_start = (uintptr_t) host_var_table[i * 2];
  1966. k->host_end
  1967. = k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
  1968. k->tgt = tgt;
  1969. k->tgt_offset = target_var->start;
  1970. k->refcount = is_link_var ? REFCOUNT_LINK : REFCOUNT_INFINITY;
  1971. k->dynamic_refcount = 0;
  1972. k->aux = NULL;
  1973. array->left = NULL;
  1974. array->right = NULL;
  1975. splay_tree_insert (&devicep->mem_map, array);
  1976. array++;
  1977. }
  1978. /* Last entry is for the on-device 'device_num' variable. Tolerate case
  1979. where plugin does not return this entry. */
  1980. if (num_funcs + num_vars < num_target_entries)
  1981. {
  1982. struct addr_pair *device_num_var = &target_table[num_funcs + num_vars];
  1983. /* Start address will be non-zero for last entry if GOMP_DEVICE_NUM_VAR
  1984. was found in this image. */
  1985. if (device_num_var->start != 0)
  1986. {
  1987. /* The index of the devicep within devices[] is regarded as its
  1988. 'device number', which is different from the per-device type
  1989. devicep->target_id. */
  1990. int device_num_val = (int) (devicep - &devices[0]);
  1991. if (device_num_var->end - device_num_var->start != sizeof (int))
  1992. {
  1993. gomp_mutex_unlock (&devicep->lock);
  1994. if (is_register_lock)
  1995. gomp_mutex_unlock (&register_lock);
  1996. gomp_fatal ("offload plugin managed 'device_num' not of expected "
  1997. "format");
  1998. }
  1999. /* Copy device_num value to place on device memory, hereby actually
  2000. designating its device number into effect. */
  2001. gomp_copy_host2dev (devicep, NULL, (void *) device_num_var->start,
  2002. &device_num_val, sizeof (int), false, NULL);
  2003. }
  2004. }
  2005. free (target_table);
  2006. }
  2007. /* Unload the mappings described by target_data from device DEVICE_P.
  2008. The device must be locked. */
  2009. static void
  2010. gomp_unload_image_from_device (struct gomp_device_descr *devicep,
  2011. unsigned version,
  2012. const void *host_table, const void *target_data)
  2013. {
  2014. void **host_func_table = ((void ***) host_table)[0];
  2015. void **host_funcs_end = ((void ***) host_table)[1];
  2016. void **host_var_table = ((void ***) host_table)[2];
  2017. void **host_vars_end = ((void ***) host_table)[3];
  2018. /* The func table contains only addresses, the var table contains addresses
  2019. and corresponding sizes. */
  2020. int num_funcs = host_funcs_end - host_func_table;
  2021. int num_vars = (host_vars_end - host_var_table) / 2;
  2022. struct splay_tree_key_s k;
  2023. splay_tree_key node = NULL;
  2024. /* Find mapping at start of node array */
  2025. if (num_funcs || num_vars)
  2026. {
  2027. k.host_start = (num_funcs ? (uintptr_t) host_func_table[0]
  2028. : (uintptr_t) host_var_table[0]);
  2029. k.host_end = k.host_start + 1;
  2030. node = splay_tree_lookup (&devicep->mem_map, &k);
  2031. }
  2032. if (!devicep->unload_image_func (devicep->target_id, version, target_data))
  2033. {
  2034. gomp_mutex_unlock (&devicep->lock);
  2035. gomp_fatal ("image unload fail");
  2036. }
  2037. /* Remove mappings from splay tree. */
  2038. int i;
  2039. for (i = 0; i < num_funcs; i++)
  2040. {
  2041. k.host_start = (uintptr_t) host_func_table[i];
  2042. k.host_end = k.host_start + 1;
  2043. splay_tree_remove (&devicep->mem_map, &k);
  2044. }
  2045. /* Most significant bit of the size in host and target tables marks
  2046. "omp declare target link" variables. */
  2047. const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
  2048. const uintptr_t size_mask = ~link_bit;
  2049. bool is_tgt_unmapped = false;
  2050. for (i = 0; i < num_vars; i++)
  2051. {
  2052. k.host_start = (uintptr_t) host_var_table[i * 2];
  2053. k.host_end
  2054. = k.host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
  2055. if (!(link_bit & (uintptr_t) host_var_table[i * 2 + 1]))
  2056. splay_tree_remove (&devicep->mem_map, &k);
  2057. else
  2058. {
  2059. splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k);
  2060. is_tgt_unmapped = gomp_remove_var (devicep, n);
  2061. }
  2062. }
  2063. if (node && !is_tgt_unmapped)
  2064. {
  2065. free (node->tgt);
  2066. free (node);
  2067. }
  2068. }
  2069. /* This function should be called from every offload image while loading.
  2070. It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
  2071. the target, and TARGET_DATA needed by target plugin. */
  2072. void
  2073. GOMP_offload_register_ver (unsigned version, const void *host_table,
  2074. int target_type, const void *target_data)
  2075. {
  2076. int i;
  2077. if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
  2078. gomp_fatal ("Library too old for offload (version %u < %u)",
  2079. GOMP_VERSION, GOMP_VERSION_LIB (version));
  2080. gomp_mutex_lock (&register_lock);
  2081. /* Load image to all initialized devices. */
  2082. for (i = 0; i < num_devices; i++)
  2083. {
  2084. struct gomp_device_descr *devicep = &devices[i];
  2085. gomp_mutex_lock (&devicep->lock);
  2086. if (devicep->type == target_type
  2087. && devicep->state == GOMP_DEVICE_INITIALIZED)
  2088. gomp_load_image_to_device (devicep, version,
  2089. host_table, target_data, true);
  2090. gomp_mutex_unlock (&devicep->lock);
  2091. }
  2092. /* Insert image to array of pending images. */
  2093. offload_images
  2094. = gomp_realloc_unlock (offload_images,
  2095. (num_offload_images + 1)
  2096. * sizeof (struct offload_image_descr));
  2097. offload_images[num_offload_images].version = version;
  2098. offload_images[num_offload_images].type = target_type;
  2099. offload_images[num_offload_images].host_table = host_table;
  2100. offload_images[num_offload_images].target_data = target_data;
  2101. num_offload_images++;
  2102. gomp_mutex_unlock (&register_lock);
  2103. }
  2104. void
  2105. GOMP_offload_register (const void *host_table, int target_type,
  2106. const void *target_data)
  2107. {
  2108. GOMP_offload_register_ver (0, host_table, target_type, target_data);
  2109. }
  2110. /* This function should be called from every offload image while unloading.
  2111. It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
  2112. the target, and TARGET_DATA needed by target plugin. */
  2113. void
  2114. GOMP_offload_unregister_ver (unsigned version, const void *host_table,
  2115. int target_type, const void *target_data)
  2116. {
  2117. int i;
  2118. gomp_mutex_lock (&register_lock);
  2119. /* Unload image from all initialized devices. */
  2120. for (i = 0; i < num_devices; i++)
  2121. {
  2122. struct gomp_device_descr *devicep = &devices[i];
  2123. gomp_mutex_lock (&devicep->lock);
  2124. if (devicep->type == target_type
  2125. && devicep->state == GOMP_DEVICE_INITIALIZED)
  2126. gomp_unload_image_from_device (devicep, version,
  2127. host_table, target_data);
  2128. gomp_mutex_unlock (&devicep->lock);
  2129. }
  2130. /* Remove image from array of pending images. */
  2131. for (i = 0; i < num_offload_images; i++)
  2132. if (offload_images[i].target_data == target_data)
  2133. {
  2134. offload_images[i] = offload_images[--num_offload_images];
  2135. break;
  2136. }
  2137. gomp_mutex_unlock (&register_lock);
  2138. }
  2139. void
  2140. GOMP_offload_unregister (const void *host_table, int target_type,
  2141. const void *target_data)
  2142. {
  2143. GOMP_offload_unregister_ver (0, host_table, target_type, target_data);
  2144. }
  2145. /* This function initializes the target device, specified by DEVICEP. DEVICEP
  2146. must be locked on entry, and remains locked on return. */
  2147. attribute_hidden void
  2148. gomp_init_device (struct gomp_device_descr *devicep)
  2149. {
  2150. int i;
  2151. if (!devicep->init_device_func (devicep->target_id))
  2152. {
  2153. gomp_mutex_unlock (&devicep->lock);
  2154. gomp_fatal ("device initialization failed");
  2155. }
  2156. /* Load to device all images registered by the moment. */
  2157. for (i = 0; i < num_offload_images; i++)
  2158. {
  2159. struct offload_image_descr *image = &offload_images[i];
  2160. if (image->type == devicep->type)
  2161. gomp_load_image_to_device (devicep, image->version,
  2162. image->host_table, image->target_data,
  2163. false);
  2164. }
  2165. /* Initialize OpenACC asynchronous queues. */
  2166. goacc_init_asyncqueues (devicep);
  2167. devicep->state = GOMP_DEVICE_INITIALIZED;
  2168. }
  2169. /* This function finalizes the target device, specified by DEVICEP. DEVICEP
  2170. must be locked on entry, and remains locked on return. */
  2171. attribute_hidden bool
  2172. gomp_fini_device (struct gomp_device_descr *devicep)
  2173. {
  2174. bool ret = goacc_fini_asyncqueues (devicep);
  2175. ret &= devicep->fini_device_func (devicep->target_id);
  2176. devicep->state = GOMP_DEVICE_FINALIZED;
  2177. return ret;
  2178. }
  2179. attribute_hidden void
  2180. gomp_unload_device (struct gomp_device_descr *devicep)
  2181. {
  2182. if (devicep->state == GOMP_DEVICE_INITIALIZED)
  2183. {
  2184. unsigned i;
  2185. /* Unload from device all images registered at the moment. */
  2186. for (i = 0; i < num_offload_images; i++)
  2187. {
  2188. struct offload_image_descr *image = &offload_images[i];
  2189. if (image->type == devicep->type)
  2190. gomp_unload_image_from_device (devicep, image->version,
  2191. image->host_table,
  2192. image->target_data);
  2193. }
  2194. }
  2195. }
  2196. /* Host fallback for GOMP_target{,_ext} routines. */
  2197. static void
  2198. gomp_target_fallback (void (*fn) (void *), void **hostaddrs,
  2199. struct gomp_device_descr *devicep, void **args)
  2200. {
  2201. struct gomp_thread old_thr, *thr = gomp_thread ();
  2202. if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
  2203. && devicep != NULL)
  2204. gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
  2205. "be used for offloading");
  2206. old_thr = *thr;
  2207. memset (thr, '\0', sizeof (*thr));
  2208. if (gomp_places_list)
  2209. {
  2210. thr->place = old_thr.place;
  2211. thr->ts.place_partition_len = gomp_places_list_len;
  2212. }
  2213. if (args)
  2214. while (*args)
  2215. {
  2216. intptr_t id = (intptr_t) *args++, val;
  2217. if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
  2218. val = (intptr_t) *args++;
  2219. else
  2220. val = id >> GOMP_TARGET_ARG_VALUE_SHIFT;
  2221. if ((id & GOMP_TARGET_ARG_DEVICE_MASK) != GOMP_TARGET_ARG_DEVICE_ALL)
  2222. continue;
  2223. id &= GOMP_TARGET_ARG_ID_MASK;
  2224. if (id != GOMP_TARGET_ARG_THREAD_LIMIT)
  2225. continue;
  2226. val = val > INT_MAX ? INT_MAX : val;
  2227. if (val)
  2228. gomp_icv (true)->thread_limit_var = val;
  2229. break;
  2230. }
  2231. fn (hostaddrs);
  2232. gomp_free_thread (thr);
  2233. *thr = old_thr;
  2234. }
  2235. /* Calculate alignment and size requirements of a private copy of data shared
  2236. as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
  2237. static inline void
  2238. calculate_firstprivate_requirements (size_t mapnum, size_t *sizes,
  2239. unsigned short *kinds, size_t *tgt_align,
  2240. size_t *tgt_size)
  2241. {
  2242. size_t i;
  2243. for (i = 0; i < mapnum; i++)
  2244. if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
  2245. {
  2246. size_t align = (size_t) 1 << (kinds[i] >> 8);
  2247. if (*tgt_align < align)
  2248. *tgt_align = align;
  2249. *tgt_size = (*tgt_size + align - 1) & ~(align - 1);
  2250. *tgt_size += sizes[i];
  2251. }
  2252. }
  2253. /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
  2254. static inline void
  2255. copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs,
  2256. size_t *sizes, unsigned short *kinds, size_t tgt_align,
  2257. size_t tgt_size)
  2258. {
  2259. uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
  2260. if (al)
  2261. tgt += tgt_align - al;
  2262. tgt_size = 0;
  2263. size_t i;
  2264. for (i = 0; i < mapnum; i++)
  2265. if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE && hostaddrs[i] != NULL)
  2266. {
  2267. size_t align = (size_t) 1 << (kinds[i] >> 8);
  2268. tgt_size = (tgt_size + align - 1) & ~(align - 1);
  2269. memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
  2270. hostaddrs[i] = tgt + tgt_size;
  2271. tgt_size = tgt_size + sizes[i];
  2272. }
  2273. }
  2274. /* Helper function of GOMP_target{,_ext} routines. */
  2275. static void *
  2276. gomp_get_target_fn_addr (struct gomp_device_descr *devicep,
  2277. void (*host_fn) (void *))
  2278. {
  2279. if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
  2280. return (void *) host_fn;
  2281. else
  2282. {
  2283. gomp_mutex_lock (&devicep->lock);
  2284. if (devicep->state == GOMP_DEVICE_FINALIZED)
  2285. {
  2286. gomp_mutex_unlock (&devicep->lock);
  2287. return NULL;
  2288. }
  2289. struct splay_tree_key_s k;
  2290. k.host_start = (uintptr_t) host_fn;
  2291. k.host_end = k.host_start + 1;
  2292. splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
  2293. gomp_mutex_unlock (&devicep->lock);
  2294. if (tgt_fn == NULL)
  2295. return NULL;
  2296. return (void *) tgt_fn->tgt_offset;
  2297. }
  2298. }
  2299. /* Called when encountering a target directive. If DEVICE
  2300. is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
  2301. GOMP_DEVICE_HOST_FALLBACK (or any value
  2302. larger than last available hw device), use host fallback.
  2303. FN is address of host code, UNUSED is part of the current ABI, but
  2304. we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
  2305. with MAPNUM entries, with addresses of the host objects,
  2306. sizes of the host objects (resp. for pointer kind pointer bias
  2307. and assumed sizeof (void *) size) and kinds. */
  2308. void
  2309. GOMP_target (int device, void (*fn) (void *), const void *unused,
  2310. size_t mapnum, void **hostaddrs, size_t *sizes,
  2311. unsigned char *kinds)
  2312. {
  2313. struct gomp_device_descr *devicep = resolve_device (device);
  2314. void *fn_addr;
  2315. if (devicep == NULL
  2316. || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
  2317. /* All shared memory devices should use the GOMP_target_ext function. */
  2318. || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM
  2319. || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)))
  2320. return gomp_target_fallback (fn, hostaddrs, devicep, NULL);
  2321. htab_t refcount_set = htab_create (mapnum);
  2322. struct target_mem_desc *tgt_vars
  2323. = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
  2324. &refcount_set, GOMP_MAP_VARS_TARGET);
  2325. devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start,
  2326. NULL);
  2327. htab_clear (refcount_set);
  2328. gomp_unmap_vars (tgt_vars, true, &refcount_set);
  2329. htab_free (refcount_set);
  2330. }
  2331. static inline unsigned int
  2332. clear_unsupported_flags (struct gomp_device_descr *devicep, unsigned int flags)
  2333. {
  2334. /* If we cannot run asynchronously, simply ignore nowait. */
  2335. if (devicep != NULL && devicep->async_run_func == NULL)
  2336. flags &= ~GOMP_TARGET_FLAG_NOWAIT;
  2337. return flags;
  2338. }
  2339. /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
  2340. and several arguments have been added:
  2341. FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
  2342. DEPEND is array of dependencies, see GOMP_task for details.
  2343. ARGS is a pointer to an array consisting of a variable number of both
  2344. device-independent and device-specific arguments, which can take one two
  2345. elements where the first specifies for which device it is intended, the type
  2346. and optionally also the value. If the value is not present in the first
  2347. one, the whole second element the actual value. The last element of the
  2348. array is a single NULL. Among the device independent can be for example
  2349. NUM_TEAMS and THREAD_LIMIT.
  2350. NUM_TEAMS is positive if GOMP_teams will be called in the body with
  2351. that value, or 1 if teams construct is not present, or 0, if
  2352. teams construct does not have num_teams clause and so the choice is
  2353. implementation defined, and -1 if it can't be determined on the host
  2354. what value will GOMP_teams have on the device.
  2355. THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
  2356. body with that value, or 0, if teams construct does not have thread_limit
  2357. clause or the teams construct is not present, or -1 if it can't be
  2358. determined on the host what value will GOMP_teams have on the device. */
  2359. void
  2360. GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
  2361. void **hostaddrs, size_t *sizes, unsigned short *kinds,
  2362. unsigned int flags, void **depend, void **args)
  2363. {
  2364. struct gomp_device_descr *devicep = resolve_device (device);
  2365. size_t tgt_align = 0, tgt_size = 0;
  2366. bool fpc_done = false;
  2367. flags = clear_unsupported_flags (devicep, flags);
  2368. if (flags & GOMP_TARGET_FLAG_NOWAIT)
  2369. {
  2370. struct gomp_thread *thr = gomp_thread ();
  2371. /* Create a team if we don't have any around, as nowait
  2372. target tasks make sense to run asynchronously even when
  2373. outside of any parallel. */
  2374. if (__builtin_expect (thr->ts.team == NULL, 0))
  2375. {
  2376. struct gomp_team *team = gomp_new_team (1);
  2377. struct gomp_task *task = thr->task;
  2378. struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv;
  2379. team->prev_ts = thr->ts;
  2380. thr->ts.team = team;
  2381. thr->ts.team_id = 0;
  2382. thr->ts.work_share = &team->work_shares[0];
  2383. thr->ts.last_work_share = NULL;
  2384. #ifdef HAVE_SYNC_BUILTINS
  2385. thr->ts.single_count = 0;
  2386. #endif
  2387. thr->ts.static_trip = 0;
  2388. thr->task = &team->implicit_task[0];
  2389. gomp_init_task (thr->task, NULL, icv);
  2390. if (task)
  2391. {
  2392. thr->task = task;
  2393. gomp_end_task ();
  2394. free (task);
  2395. thr->task = &team->implicit_task[0];
  2396. }
  2397. else
  2398. pthread_setspecific (gomp_thread_destructor, thr);
  2399. }
  2400. if (thr->ts.team
  2401. && !thr->task->final_task)
  2402. {
  2403. gomp_create_target_task (devicep, fn, mapnum, hostaddrs,
  2404. sizes, kinds, flags, depend, args,
  2405. GOMP_TARGET_TASK_BEFORE_MAP);
  2406. return;
  2407. }
  2408. }
  2409. /* If there are depend clauses, but nowait is not present
  2410. (or we are in a final task), block the parent task until the
  2411. dependencies are resolved and then just continue with the rest
  2412. of the function as if it is a merged task. */
  2413. if (depend != NULL)
  2414. {
  2415. struct gomp_thread *thr = gomp_thread ();
  2416. if (thr->task && thr->task->depend_hash)
  2417. {
  2418. /* If we might need to wait, copy firstprivate now. */
  2419. calculate_firstprivate_requirements (mapnum, sizes, kinds,
  2420. &tgt_align, &tgt_size);
  2421. if (tgt_align)
  2422. {
  2423. char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
  2424. copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
  2425. tgt_align, tgt_size);
  2426. }
  2427. fpc_done = true;
  2428. gomp_task_maybe_wait_for_dependencies (depend);
  2429. }
  2430. }
  2431. void *fn_addr;
  2432. if (devicep == NULL
  2433. || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
  2434. || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))
  2435. || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
  2436. {
  2437. if (!fpc_done)
  2438. {
  2439. calculate_firstprivate_requirements (mapnum, sizes, kinds,
  2440. &tgt_align, &tgt_size);
  2441. if (tgt_align)
  2442. {
  2443. char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
  2444. copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
  2445. tgt_align, tgt_size);
  2446. }
  2447. }
  2448. gomp_target_fallback (fn, hostaddrs, devicep, args);
  2449. return;
  2450. }
  2451. struct target_mem_desc *tgt_vars;
  2452. htab_t refcount_set = NULL;
  2453. if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
  2454. {
  2455. if (!fpc_done)
  2456. {
  2457. calculate_firstprivate_requirements (mapnum, sizes, kinds,
  2458. &tgt_align, &tgt_size);
  2459. if (tgt_align)
  2460. {
  2461. char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
  2462. copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
  2463. tgt_align, tgt_size);
  2464. }
  2465. }
  2466. tgt_vars = NULL;
  2467. }
  2468. else
  2469. {
  2470. refcount_set = htab_create (mapnum);
  2471. tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds,
  2472. true, &refcount_set, GOMP_MAP_VARS_TARGET);
  2473. }
  2474. devicep->run_func (devicep->target_id, fn_addr,
  2475. tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
  2476. args);
  2477. if (tgt_vars)
  2478. {
  2479. htab_clear (refcount_set);
  2480. gomp_unmap_vars (tgt_vars, true, &refcount_set);
  2481. }
  2482. if (refcount_set)
  2483. htab_free (refcount_set);
  2484. }
  2485. /* Host fallback for GOMP_target_data{,_ext} routines. */
  2486. static void
  2487. gomp_target_data_fallback (struct gomp_device_descr *devicep)
  2488. {
  2489. struct gomp_task_icv *icv = gomp_icv (false);
  2490. if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
  2491. && devicep != NULL)
  2492. gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
  2493. "be used for offloading");
  2494. if (icv->target_data)
  2495. {
  2496. /* Even when doing a host fallback, if there are any active
  2497. #pragma omp target data constructs, need to remember the
  2498. new #pragma omp target data, otherwise GOMP_target_end_data
  2499. would get out of sync. */
  2500. struct target_mem_desc *tgt
  2501. = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false,
  2502. NULL, GOMP_MAP_VARS_DATA);
  2503. tgt->prev = icv->target_data;
  2504. icv->target_data = tgt;
  2505. }
  2506. }
  2507. void
  2508. GOMP_target_data (int device, const void *unused, size_t mapnum,
  2509. void **hostaddrs, size_t *sizes, unsigned char *kinds)
  2510. {
  2511. struct gomp_device_descr *devicep = resolve_device (device);
  2512. if (devicep == NULL
  2513. || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
  2514. || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM))
  2515. return gomp_target_data_fallback (devicep);
  2516. struct target_mem_desc *tgt
  2517. = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
  2518. NULL, GOMP_MAP_VARS_DATA);
  2519. struct gomp_task_icv *icv = gomp_icv (true);
  2520. tgt->prev = icv->target_data;
  2521. icv->target_data = tgt;
  2522. }
  2523. void
  2524. GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs,
  2525. size_t *sizes, unsigned short *kinds)
  2526. {
  2527. struct gomp_device_descr *devicep = resolve_device (device);
  2528. if (devicep == NULL
  2529. || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
  2530. || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
  2531. return gomp_target_data_fallback (devicep);
  2532. struct target_mem_desc *tgt
  2533. = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
  2534. NULL, GOMP_MAP_VARS_DATA);
  2535. struct gomp_task_icv *icv = gomp_icv (true);
  2536. tgt->prev = icv->target_data;
  2537. icv->target_data = tgt;
  2538. }
  2539. void
  2540. GOMP_target_end_data (void)
  2541. {
  2542. struct gomp_task_icv *icv = gomp_icv (false);
  2543. if (icv->target_data)
  2544. {
  2545. struct target_mem_desc *tgt = icv->target_data;
  2546. icv->target_data = tgt->prev;
  2547. gomp_unmap_vars (tgt, true, NULL);
  2548. }
  2549. }
  2550. void
  2551. GOMP_target_update (int device, const void *unused, size_t mapnum,
  2552. void **hostaddrs, size_t *sizes, unsigned char *kinds)
  2553. {
  2554. struct gomp_device_descr *devicep = resolve_device (device);
  2555. if (devicep == NULL
  2556. || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
  2557. || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
  2558. return;
  2559. gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
  2560. }
  2561. void
  2562. GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
  2563. size_t *sizes, unsigned short *kinds,
  2564. unsigned int flags, void **depend)
  2565. {
  2566. struct gomp_device_descr *devicep = resolve_device (device);
  2567. /* If there are depend clauses, but nowait is not present,
  2568. block the parent task until the dependencies are resolved
  2569. and then just continue with the rest of the function as if it
  2570. is a merged task. Until we are able to schedule task during
  2571. variable mapping or unmapping, ignore nowait if depend clauses
  2572. are not present. */
  2573. if (depend != NULL)
  2574. {
  2575. struct gomp_thread *thr = gomp_thread ();
  2576. if (thr->task && thr->task->depend_hash)
  2577. {
  2578. if ((flags & GOMP_TARGET_FLAG_NOWAIT)
  2579. && thr->ts.team
  2580. && !thr->task->final_task)
  2581. {
  2582. if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
  2583. mapnum, hostaddrs, sizes, kinds,
  2584. flags | GOMP_TARGET_FLAG_UPDATE,
  2585. depend, NULL, GOMP_TARGET_TASK_DATA))
  2586. return;
  2587. }
  2588. else
  2589. {
  2590. struct gomp_team *team = thr->ts.team;
  2591. /* If parallel or taskgroup has been cancelled, don't start new
  2592. tasks. */
  2593. if (__builtin_expect (gomp_cancel_var, 0) && team)
  2594. {
  2595. if (gomp_team_barrier_cancelled (&team->barrier))
  2596. return;
  2597. if (thr->task->taskgroup)
  2598. {
  2599. if (thr->task->taskgroup->cancelled)
  2600. return;
  2601. if (thr->task->taskgroup->workshare
  2602. && thr->task->taskgroup->prev
  2603. && thr->task->taskgroup->prev->cancelled)
  2604. return;
  2605. }
  2606. }
  2607. gomp_task_maybe_wait_for_dependencies (depend);
  2608. }
  2609. }
  2610. }
  2611. if (devicep == NULL
  2612. || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
  2613. || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
  2614. return;
  2615. struct gomp_thread *thr = gomp_thread ();
  2616. struct gomp_team *team = thr->ts.team;
  2617. /* If parallel or taskgroup has been cancelled, don't start new tasks. */
  2618. if (__builtin_expect (gomp_cancel_var, 0) && team)
  2619. {
  2620. if (gomp_team_barrier_cancelled (&team->barrier))
  2621. return;
  2622. if (thr->task->taskgroup)
  2623. {
  2624. if (thr->task->taskgroup->cancelled)
  2625. return;
  2626. if (thr->task->taskgroup->workshare
  2627. && thr->task->taskgroup->prev
  2628. && thr->task->taskgroup->prev->cancelled)
  2629. return;
  2630. }
  2631. }
  2632. gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
  2633. }
  2634. static void
  2635. gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
  2636. void **hostaddrs, size_t *sizes, unsigned short *kinds,
  2637. htab_t *refcount_set)
  2638. {
  2639. const int typemask = 0xff;
  2640. size_t i;
  2641. gomp_mutex_lock (&devicep->lock);
  2642. if (devicep->state == GOMP_DEVICE_FINALIZED)
  2643. {
  2644. gomp_mutex_unlock (&devicep->lock);
  2645. return;
  2646. }
  2647. for (i = 0; i < mapnum; i++)
  2648. if ((kinds[i] & typemask) == GOMP_MAP_DETACH)
  2649. {
  2650. struct splay_tree_key_s cur_node;
  2651. cur_node.host_start = (uintptr_t) hostaddrs[i];
  2652. cur_node.host_end = cur_node.host_start + sizeof (void *);
  2653. splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
  2654. if (n)
  2655. gomp_detach_pointer (devicep, NULL, n, (uintptr_t) hostaddrs[i],
  2656. false, NULL);
  2657. }
  2658. int nrmvars = 0;
  2659. splay_tree_key remove_vars[mapnum];
  2660. for (i = 0; i < mapnum; i++)
  2661. {
  2662. struct splay_tree_key_s cur_node;
  2663. unsigned char kind = kinds[i] & typemask;
  2664. switch (kind)
  2665. {
  2666. case GOMP_MAP_FROM:
  2667. case GOMP_MAP_ALWAYS_FROM:
  2668. case GOMP_MAP_DELETE:
  2669. case GOMP_MAP_RELEASE:
  2670. case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
  2671. case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
  2672. cur_node.host_start = (uintptr_t) hostaddrs[i];
  2673. cur_node.host_end = cur_node.host_start + sizes[i];
  2674. splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
  2675. || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
  2676. ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node)
  2677. : splay_tree_lookup (&devicep->mem_map, &cur_node);
  2678. if (!k)
  2679. continue;
  2680. bool delete_p = (kind == GOMP_MAP_DELETE
  2681. || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION);
  2682. bool do_copy, do_remove;
  2683. gomp_decrement_refcount (k, refcount_set, delete_p, &do_copy,
  2684. &do_remove);
  2685. if ((kind == GOMP_MAP_FROM && do_copy)
  2686. || kind == GOMP_MAP_ALWAYS_FROM)
  2687. {
  2688. if (k->aux && k->aux->attach_count)
  2689. {
  2690. /* We have to be careful not to overwrite still attached
  2691. pointers during the copyback to host. */
  2692. uintptr_t addr = k->host_start;
  2693. while (addr < k->host_end)
  2694. {
  2695. size_t i = (addr - k->host_start) / sizeof (void *);
  2696. if (k->aux->attach_count[i] == 0)
  2697. gomp_copy_dev2host (devicep, NULL, (void *) addr,
  2698. (void *) (k->tgt->tgt_start
  2699. + k->tgt_offset
  2700. + addr - k->host_start),
  2701. sizeof (void *));
  2702. addr += sizeof (void *);
  2703. }
  2704. }
  2705. else
  2706. gomp_copy_dev2host (devicep, NULL, (void *) cur_node.host_start,
  2707. (void *) (k->tgt->tgt_start + k->tgt_offset
  2708. + cur_node.host_start
  2709. - k->host_start),
  2710. cur_node.host_end - cur_node.host_start);
  2711. }
  2712. /* Structure elements lists are removed altogether at once, which
  2713. may cause immediate deallocation of the target_mem_desc, causing
  2714. errors if we still have following element siblings to copy back.
  2715. While we're at it, it also seems more disciplined to simply
  2716. queue all removals together for processing below.
  2717. Structured block unmapping (i.e. gomp_unmap_vars_internal) should
  2718. not have this problem, since they maintain an additional
  2719. tgt->refcount = 1 reference to the target_mem_desc to start with.
  2720. */
  2721. if (do_remove)
  2722. remove_vars[nrmvars++] = k;
  2723. break;
  2724. case GOMP_MAP_DETACH:
  2725. break;
  2726. default:
  2727. gomp_mutex_unlock (&devicep->lock);
  2728. gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
  2729. kind);
  2730. }
  2731. }
  2732. for (int i = 0; i < nrmvars; i++)
  2733. gomp_remove_var (devicep, remove_vars[i]);
  2734. gomp_mutex_unlock (&devicep->lock);
  2735. }
  2736. void
  2737. GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
  2738. size_t *sizes, unsigned short *kinds,
  2739. unsigned int flags, void **depend)
  2740. {
  2741. struct gomp_device_descr *devicep = resolve_device (device);
  2742. /* If there are depend clauses, but nowait is not present,
  2743. block the parent task until the dependencies are resolved
  2744. and then just continue with the rest of the function as if it
  2745. is a merged task. Until we are able to schedule task during
  2746. variable mapping or unmapping, ignore nowait if depend clauses
  2747. are not present. */
  2748. if (depend != NULL)
  2749. {
  2750. struct gomp_thread *thr = gomp_thread ();
  2751. if (thr->task && thr->task->depend_hash)
  2752. {
  2753. if ((flags & GOMP_TARGET_FLAG_NOWAIT)
  2754. && thr->ts.team
  2755. && !thr->task->final_task)
  2756. {
  2757. if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
  2758. mapnum, hostaddrs, sizes, kinds,
  2759. flags, depend, NULL,
  2760. GOMP_TARGET_TASK_DATA))
  2761. return;
  2762. }
  2763. else
  2764. {
  2765. struct gomp_team *team = thr->ts.team;
  2766. /* If parallel or taskgroup has been cancelled, don't start new
  2767. tasks. */
  2768. if (__builtin_expect (gomp_cancel_var, 0) && team)
  2769. {
  2770. if (gomp_team_barrier_cancelled (&team->barrier))
  2771. return;
  2772. if (thr->task->taskgroup)
  2773. {
  2774. if (thr->task->taskgroup->cancelled)
  2775. return;
  2776. if (thr->task->taskgroup->workshare
  2777. && thr->task->taskgroup->prev
  2778. && thr->task->taskgroup->prev->cancelled)
  2779. return;
  2780. }
  2781. }
  2782. gomp_task_maybe_wait_for_dependencies (depend);
  2783. }
  2784. }
  2785. }
  2786. if (devicep == NULL
  2787. || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
  2788. || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
  2789. return;
  2790. struct gomp_thread *thr = gomp_thread ();
  2791. struct gomp_team *team = thr->ts.team;
  2792. /* If parallel or taskgroup has been cancelled, don't start new tasks. */
  2793. if (__builtin_expect (gomp_cancel_var, 0) && team)
  2794. {
  2795. if (gomp_team_barrier_cancelled (&team->barrier))
  2796. return;
  2797. if (thr->task->taskgroup)
  2798. {
  2799. if (thr->task->taskgroup->cancelled)
  2800. return;
  2801. if (thr->task->taskgroup->workshare
  2802. && thr->task->taskgroup->prev
  2803. && thr->task->taskgroup->prev->cancelled)
  2804. return;
  2805. }
  2806. }
  2807. htab_t refcount_set = htab_create (mapnum);
  2808. /* The variables are mapped separately such that they can be released
  2809. independently. */
  2810. size_t i, j;
  2811. if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
  2812. for (i = 0; i < mapnum; i++)
  2813. if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT)
  2814. {
  2815. gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i],
  2816. &kinds[i], true, &refcount_set,
  2817. GOMP_MAP_VARS_ENTER_DATA);
  2818. i += sizes[i];
  2819. }
  2820. else if ((kinds[i] & 0xff) == GOMP_MAP_TO_PSET)
  2821. {
  2822. for (j = i + 1; j < mapnum; j++)
  2823. if (!GOMP_MAP_POINTER_P (get_kind (true, kinds, j) & 0xff)
  2824. && !GOMP_MAP_ALWAYS_POINTER_P (get_kind (true, kinds, j) & 0xff))
  2825. break;
  2826. gomp_map_vars (devicep, j-i, &hostaddrs[i], NULL, &sizes[i],
  2827. &kinds[i], true, &refcount_set,
  2828. GOMP_MAP_VARS_ENTER_DATA);
  2829. i += j - i - 1;
  2830. }
  2831. else if (i + 1 < mapnum && (kinds[i + 1] & 0xff) == GOMP_MAP_ATTACH)
  2832. {
  2833. /* An attach operation must be processed together with the mapped
  2834. base-pointer list item. */
  2835. gomp_map_vars (devicep, 2, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
  2836. true, &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
  2837. i += 1;
  2838. }
  2839. else
  2840. gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
  2841. true, &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
  2842. else
  2843. gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds, &refcount_set);
  2844. htab_free (refcount_set);
  2845. }
  2846. bool
  2847. gomp_target_task_fn (void *data)
  2848. {
  2849. struct gomp_target_task *ttask = (struct gomp_target_task *) data;
  2850. struct gomp_device_descr *devicep = ttask->devicep;
  2851. if (ttask->fn != NULL)
  2852. {
  2853. void *fn_addr;
  2854. if (devicep == NULL
  2855. || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
  2856. || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn))
  2857. || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
  2858. {
  2859. ttask->state = GOMP_TARGET_TASK_FALLBACK;
  2860. gomp_target_fallback (ttask->fn, ttask->hostaddrs, devicep,
  2861. ttask->args);
  2862. return false;
  2863. }
  2864. if (ttask->state == GOMP_TARGET_TASK_FINISHED)
  2865. {
  2866. if (ttask->tgt)
  2867. gomp_unmap_vars (ttask->tgt, true, NULL);
  2868. return false;
  2869. }
  2870. void *actual_arguments;
  2871. if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
  2872. {
  2873. ttask->tgt = NULL;
  2874. actual_arguments = ttask->hostaddrs;
  2875. }
  2876. else
  2877. {
  2878. ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs,
  2879. NULL, ttask->sizes, ttask->kinds, true,
  2880. NULL, GOMP_MAP_VARS_TARGET);
  2881. actual_arguments = (void *) ttask->tgt->tgt_start;
  2882. }
  2883. ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
  2884. assert (devicep->async_run_func);
  2885. devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
  2886. ttask->args, (void *) ttask);
  2887. return true;
  2888. }
  2889. else if (devicep == NULL
  2890. || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
  2891. || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
  2892. return false;
  2893. size_t i;
  2894. if (ttask->flags & GOMP_TARGET_FLAG_UPDATE)
  2895. gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
  2896. ttask->kinds, true);
  2897. else
  2898. {
  2899. htab_t refcount_set = htab_create (ttask->mapnum);
  2900. if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
  2901. for (i = 0; i < ttask->mapnum; i++)
  2902. if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT)
  2903. {
  2904. gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i],
  2905. NULL, &ttask->sizes[i], &ttask->kinds[i], true,
  2906. &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
  2907. i += ttask->sizes[i];
  2908. }
  2909. else
  2910. gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i],
  2911. &ttask->kinds[i], true, &refcount_set,
  2912. GOMP_MAP_VARS_ENTER_DATA);
  2913. else
  2914. gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
  2915. ttask->kinds, &refcount_set);
  2916. htab_free (refcount_set);
  2917. }
  2918. return false;
  2919. }
  2920. void
  2921. GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
  2922. {
  2923. if (thread_limit)
  2924. {
  2925. struct gomp_task_icv *icv = gomp_icv (true);
  2926. icv->thread_limit_var
  2927. = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
  2928. }
  2929. (void) num_teams;
  2930. }
  2931. bool
  2932. GOMP_teams4 (unsigned int num_teams_low, unsigned int num_teams_high,
  2933. unsigned int thread_limit, bool first)
  2934. {
  2935. struct gomp_thread *thr = gomp_thread ();
  2936. if (first)
  2937. {
  2938. if (thread_limit)
  2939. {
  2940. struct gomp_task_icv *icv = gomp_icv (true);
  2941. icv->thread_limit_var
  2942. = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
  2943. }
  2944. (void) num_teams_high;
  2945. if (num_teams_low == 0)
  2946. num_teams_low = 1;
  2947. thr->num_teams = num_teams_low - 1;
  2948. thr->team_num = 0;
  2949. }
  2950. else if (thr->team_num == thr->num_teams)
  2951. return false;
  2952. else
  2953. ++thr->team_num;
  2954. return true;
  2955. }
  2956. void *
  2957. omp_target_alloc (size_t size, int device_num)
  2958. {
  2959. if (device_num == gomp_get_num_devices ())
  2960. return malloc (size);
  2961. if (device_num < 0)
  2962. return NULL;
  2963. struct gomp_device_descr *devicep = resolve_device (device_num);
  2964. if (devicep == NULL)
  2965. return NULL;
  2966. if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
  2967. || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
  2968. return malloc (size);
  2969. gomp_mutex_lock (&devicep->lock);
  2970. void *ret = devicep->alloc_func (devicep->target_id, size);
  2971. gomp_mutex_unlock (&devicep->lock);
  2972. return ret;
  2973. }
  2974. void
  2975. omp_target_free (void *device_ptr, int device_num)
  2976. {
  2977. if (device_ptr == NULL)
  2978. return;
  2979. if (device_num == gomp_get_num_devices ())
  2980. {
  2981. free (device_ptr);
  2982. return;
  2983. }
  2984. if (device_num < 0)
  2985. return;
  2986. struct gomp_device_descr *devicep = resolve_device (device_num);
  2987. if (devicep == NULL)
  2988. return;
  2989. if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
  2990. || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
  2991. {
  2992. free (device_ptr);
  2993. return;
  2994. }
  2995. gomp_mutex_lock (&devicep->lock);
  2996. gomp_free_device_memory (devicep, device_ptr);
  2997. gomp_mutex_unlock (&devicep->lock);
  2998. }
  2999. int
  3000. omp_target_is_present (const void *ptr, int device_num)
  3001. {
  3002. if (ptr == NULL)
  3003. return 1;
  3004. if (device_num == gomp_get_num_devices ())
  3005. return 1;
  3006. if (device_num < 0)
  3007. return 0;
  3008. struct gomp_device_descr *devicep = resolve_device (device_num);
  3009. if (devicep == NULL)
  3010. return 0;
  3011. if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
  3012. || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
  3013. return 1;
  3014. gomp_mutex_lock (&devicep->lock);
  3015. struct splay_tree_s *mem_map = &devicep->mem_map;
  3016. struct splay_tree_key_s cur_node;
  3017. cur_node.host_start = (uintptr_t) ptr;
  3018. cur_node.host_end = cur_node.host_start;
  3019. splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
  3020. int ret = n != NULL;
  3021. gomp_mutex_unlock (&devicep->lock);
  3022. return ret;
  3023. }
  3024. int
  3025. omp_target_memcpy (void *dst, const void *src, size_t length,
  3026. size_t dst_offset, size_t src_offset, int dst_device_num,
  3027. int src_device_num)
  3028. {
  3029. struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
  3030. bool ret;
  3031. if (dst_device_num != gomp_get_num_devices ())
  3032. {
  3033. if (dst_device_num < 0)
  3034. return EINVAL;
  3035. dst_devicep = resolve_device (dst_device_num);
  3036. if (dst_devicep == NULL)
  3037. return EINVAL;
  3038. if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
  3039. || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
  3040. dst_devicep = NULL;
  3041. }
  3042. if (src_device_num != num_devices_openmp)
  3043. {
  3044. if (src_device_num < 0)
  3045. return EINVAL;
  3046. src_devicep = resolve_device (src_device_num);
  3047. if (src_devicep == NULL)
  3048. return EINVAL;
  3049. if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
  3050. || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
  3051. src_devicep = NULL;
  3052. }
  3053. if (src_devicep == NULL && dst_devicep == NULL)
  3054. {
  3055. memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length);
  3056. return 0;
  3057. }
  3058. if (src_devicep == NULL)
  3059. {
  3060. gomp_mutex_lock (&dst_devicep->lock);
  3061. ret = dst_devicep->host2dev_func (dst_devicep->target_id,
  3062. (char *) dst + dst_offset,
  3063. (char *) src + src_offset, length);
  3064. gomp_mutex_unlock (&dst_devicep->lock);
  3065. return (ret ? 0 : EINVAL);
  3066. }
  3067. if (dst_devicep == NULL)
  3068. {
  3069. gomp_mutex_lock (&src_devicep->lock);
  3070. ret = src_devicep->dev2host_func (src_devicep->target_id,
  3071. (char *) dst + dst_offset,
  3072. (char *) src + src_offset, length);
  3073. gomp_mutex_unlock (&src_devicep->lock);
  3074. return (ret ? 0 : EINVAL);
  3075. }
  3076. if (src_devicep == dst_devicep)
  3077. {
  3078. gomp_mutex_lock (&src_devicep->lock);
  3079. ret = src_devicep->dev2dev_func (src_devicep->target_id,
  3080. (char *) dst + dst_offset,
  3081. (char *) src + src_offset, length);
  3082. gomp_mutex_unlock (&src_devicep->lock);
  3083. return (ret ? 0 : EINVAL);
  3084. }
  3085. return EINVAL;
  3086. }
  3087. static int
  3088. omp_target_memcpy_rect_worker (void *dst, const void *src, size_t element_size,
  3089. int num_dims, const size_t *volume,
  3090. const size_t *dst_offsets,
  3091. const size_t *src_offsets,
  3092. const size_t *dst_dimensions,
  3093. const size_t *src_dimensions,
  3094. struct gomp_device_descr *dst_devicep,
  3095. struct gomp_device_descr *src_devicep)
  3096. {
  3097. size_t dst_slice = element_size;
  3098. size_t src_slice = element_size;
  3099. size_t j, dst_off, src_off, length;
  3100. int i, ret;
  3101. if (num_dims == 1)
  3102. {
  3103. if (__builtin_mul_overflow (element_size, volume[0], &length)
  3104. || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off)
  3105. || __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
  3106. return EINVAL;
  3107. if (dst_devicep == NULL && src_devicep == NULL)
  3108. {
  3109. memcpy ((char *) dst + dst_off, (const char *) src + src_off,
  3110. length);
  3111. ret = 1;
  3112. }
  3113. else if (src_devicep == NULL)
  3114. ret = dst_devicep->host2dev_func (dst_devicep->target_id,
  3115. (char *) dst + dst_off,
  3116. (const char *) src + src_off,
  3117. length);
  3118. else if (dst_devicep == NULL)
  3119. ret = src_devicep->dev2host_func (src_devicep->target_id,
  3120. (char *) dst + dst_off,
  3121. (const char *) src + src_off,
  3122. length);
  3123. else if (src_devicep == dst_devicep)
  3124. ret = src_devicep->dev2dev_func (src_devicep->target_id,
  3125. (char *) dst + dst_off,
  3126. (const char *) src + src_off,
  3127. length);
  3128. else
  3129. ret = 0;
  3130. return ret ? 0 : EINVAL;
  3131. }
  3132. /* FIXME: it would be nice to have some plugin function to handle
  3133. num_dims == 2 and num_dims == 3 more efficiently. Larger ones can
  3134. be handled in the generic recursion below, and for host-host it
  3135. should be used even for any num_dims >= 2. */
  3136. for (i = 1; i < num_dims; i++)
  3137. if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice)
  3138. || __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice))
  3139. return EINVAL;
  3140. if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off)
  3141. || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off))
  3142. return EINVAL;
  3143. for (j = 0; j < volume[0]; j++)
  3144. {
  3145. ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off,
  3146. (const char *) src + src_off,
  3147. element_size, num_dims - 1,
  3148. volume + 1, dst_offsets + 1,
  3149. src_offsets + 1, dst_dimensions + 1,
  3150. src_dimensions + 1, dst_devicep,
  3151. src_devicep);
  3152. if (ret)
  3153. return ret;
  3154. dst_off += dst_slice;
  3155. src_off += src_slice;
  3156. }
  3157. return 0;
  3158. }
  3159. int
  3160. omp_target_memcpy_rect (void *dst, const void *src, size_t element_size,
  3161. int num_dims, const size_t *volume,
  3162. const size_t *dst_offsets,
  3163. const size_t *src_offsets,
  3164. const size_t *dst_dimensions,
  3165. const size_t *src_dimensions,
  3166. int dst_device_num, int src_device_num)
  3167. {
  3168. struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
  3169. if (!dst && !src)
  3170. return INT_MAX;
  3171. if (dst_device_num != gomp_get_num_devices ())
  3172. {
  3173. if (dst_device_num < 0)
  3174. return EINVAL;
  3175. dst_devicep = resolve_device (dst_device_num);
  3176. if (dst_devicep == NULL)
  3177. return EINVAL;
  3178. if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
  3179. || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
  3180. dst_devicep = NULL;
  3181. }
  3182. if (src_device_num != num_devices_openmp)
  3183. {
  3184. if (src_device_num < 0)
  3185. return EINVAL;
  3186. src_devicep = resolve_device (src_device_num);
  3187. if (src_devicep == NULL)
  3188. return EINVAL;
  3189. if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
  3190. || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
  3191. src_devicep = NULL;
  3192. }
  3193. if (src_devicep != NULL && dst_devicep != NULL && src_devicep != dst_devicep)
  3194. return EINVAL;
  3195. if (src_devicep)
  3196. gomp_mutex_lock (&src_devicep->lock);
  3197. else if (dst_devicep)
  3198. gomp_mutex_lock (&dst_devicep->lock);
  3199. int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims,
  3200. volume, dst_offsets, src_offsets,
  3201. dst_dimensions, src_dimensions,
  3202. dst_devicep, src_devicep);
  3203. if (src_devicep)
  3204. gomp_mutex_unlock (&src_devicep->lock);
  3205. else if (dst_devicep)
  3206. gomp_mutex_unlock (&dst_devicep->lock);
  3207. return ret;
  3208. }
  3209. int
  3210. omp_target_associate_ptr (const void *host_ptr, const void *device_ptr,
  3211. size_t size, size_t device_offset, int device_num)
  3212. {
  3213. if (device_num == gomp_get_num_devices ())
  3214. return EINVAL;
  3215. if (device_num < 0)
  3216. return EINVAL;
  3217. struct gomp_device_descr *devicep = resolve_device (device_num);
  3218. if (devicep == NULL)
  3219. return EINVAL;
  3220. if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
  3221. || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
  3222. return EINVAL;
  3223. gomp_mutex_lock (&devicep->lock);
  3224. struct splay_tree_s *mem_map = &devicep->mem_map;
  3225. struct splay_tree_key_s cur_node;
  3226. int ret = EINVAL;
  3227. cur_node.host_start = (uintptr_t) host_ptr;
  3228. cur_node.host_end = cur_node.host_start + size;
  3229. splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
  3230. if (n)
  3231. {
  3232. if (n->tgt->tgt_start + n->tgt_offset
  3233. == (uintptr_t) device_ptr + device_offset
  3234. && n->host_start <= cur_node.host_start
  3235. && n->host_end >= cur_node.host_end)
  3236. ret = 0;
  3237. }
  3238. else
  3239. {
  3240. struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
  3241. tgt->array = gomp_malloc (sizeof (*tgt->array));
  3242. tgt->refcount = 1;
  3243. tgt->tgt_start = 0;
  3244. tgt->tgt_end = 0;
  3245. tgt->to_free = NULL;
  3246. tgt->prev = NULL;
  3247. tgt->list_count = 0;
  3248. tgt->device_descr = devicep;
  3249. splay_tree_node array = tgt->array;
  3250. splay_tree_key k = &array->key;
  3251. k->host_start = cur_node.host_start;
  3252. k->host_end = cur_node.host_end;
  3253. k->tgt = tgt;
  3254. k->tgt_offset = (uintptr_t) device_ptr + device_offset;
  3255. k->refcount = REFCOUNT_INFINITY;
  3256. k->dynamic_refcount = 0;
  3257. k->aux = NULL;
  3258. array->left = NULL;
  3259. array->right = NULL;
  3260. splay_tree_insert (&devicep->mem_map, array);
  3261. ret = 0;
  3262. }
  3263. gomp_mutex_unlock (&devicep->lock);
  3264. return ret;
  3265. }
  3266. int
  3267. omp_target_disassociate_ptr (const void *ptr, int device_num)
  3268. {
  3269. if (device_num == gomp_get_num_devices ())
  3270. return EINVAL;
  3271. if (device_num < 0)
  3272. return EINVAL;
  3273. struct gomp_device_descr *devicep = resolve_device (device_num);
  3274. if (devicep == NULL)
  3275. return EINVAL;
  3276. if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
  3277. return EINVAL;
  3278. gomp_mutex_lock (&devicep->lock);
  3279. struct splay_tree_s *mem_map = &devicep->mem_map;
  3280. struct splay_tree_key_s cur_node;
  3281. int ret = EINVAL;
  3282. cur_node.host_start = (uintptr_t) ptr;
  3283. cur_node.host_end = cur_node.host_start;
  3284. splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
  3285. if (n
  3286. && n->host_start == cur_node.host_start
  3287. && n->refcount == REFCOUNT_INFINITY
  3288. && n->tgt->tgt_start == 0
  3289. && n->tgt->to_free == NULL
  3290. && n->tgt->refcount == 1
  3291. && n->tgt->list_count == 0)
  3292. {
  3293. splay_tree_remove (&devicep->mem_map, n);
  3294. gomp_unmap_tgt (n->tgt);
  3295. ret = 0;
  3296. }
  3297. gomp_mutex_unlock (&devicep->lock);
  3298. return ret;
  3299. }
  3300. int
  3301. omp_pause_resource (omp_pause_resource_t kind, int device_num)
  3302. {
  3303. (void) kind;
  3304. if (device_num == gomp_get_num_devices ())
  3305. return gomp_pause_host ();
  3306. if (device_num < 0 || device_num >= num_devices_openmp)
  3307. return -1;
  3308. /* Do nothing for target devices for now. */
  3309. return 0;
  3310. }
  3311. int
  3312. omp_pause_resource_all (omp_pause_resource_t kind)
  3313. {
  3314. (void) kind;
  3315. if (gomp_pause_host ())
  3316. return -1;
  3317. /* Do nothing for target devices for now. */
  3318. return 0;
  3319. }
  3320. ialias (omp_pause_resource)
  3321. ialias (omp_pause_resource_all)
  3322. #ifdef PLUGIN_SUPPORT
  3323. /* This function tries to load a plugin for DEVICE. Name of plugin is passed
  3324. in PLUGIN_NAME.
  3325. The handles of the found functions are stored in the corresponding fields
  3326. of DEVICE. The function returns TRUE on success and FALSE otherwise. */
  3327. static bool
  3328. gomp_load_plugin_for_device (struct gomp_device_descr *device,
  3329. const char *plugin_name)
  3330. {
  3331. const char *err = NULL, *last_missing = NULL;
  3332. void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
  3333. if (!plugin_handle)
  3334. #if OFFLOAD_DEFAULTED
  3335. return 0;
  3336. #else
  3337. goto dl_fail;
  3338. #endif
  3339. /* Check if all required functions are available in the plugin and store
  3340. their handlers. None of the symbols can legitimately be NULL,
  3341. so we don't need to check dlerror all the time. */
  3342. #define DLSYM(f) \
  3343. if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
  3344. goto dl_fail
  3345. /* Similar, but missing functions are not an error. Return false if
  3346. failed, true otherwise. */
  3347. #define DLSYM_OPT(f, n) \
  3348. ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
  3349. || (last_missing = #n, 0))
  3350. DLSYM (version);
  3351. if (device->version_func () != GOMP_VERSION)
  3352. {
  3353. err = "plugin version mismatch";
  3354. goto fail;
  3355. }
  3356. DLSYM (get_name);
  3357. DLSYM (get_caps);
  3358. DLSYM (get_type);
  3359. DLSYM (get_num_devices);
  3360. DLSYM (init_device);
  3361. DLSYM (fini_device);
  3362. DLSYM (load_image);
  3363. DLSYM (unload_image);
  3364. DLSYM (alloc);
  3365. DLSYM (free);
  3366. DLSYM (dev2host);
  3367. DLSYM (host2dev);
  3368. device->capabilities = device->get_caps_func ();
  3369. if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
  3370. {
  3371. DLSYM (run);
  3372. DLSYM_OPT (async_run, async_run);
  3373. DLSYM_OPT (can_run, can_run);
  3374. DLSYM (dev2dev);
  3375. }
  3376. if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
  3377. {
  3378. if (!DLSYM_OPT (openacc.exec, openacc_exec)
  3379. || !DLSYM_OPT (openacc.create_thread_data,
  3380. openacc_create_thread_data)
  3381. || !DLSYM_OPT (openacc.destroy_thread_data,
  3382. openacc_destroy_thread_data)
  3383. || !DLSYM_OPT (openacc.async.construct, openacc_async_construct)
  3384. || !DLSYM_OPT (openacc.async.destruct, openacc_async_destruct)
  3385. || !DLSYM_OPT (openacc.async.test, openacc_async_test)
  3386. || !DLSYM_OPT (openacc.async.synchronize, openacc_async_synchronize)
  3387. || !DLSYM_OPT (openacc.async.serialize, openacc_async_serialize)
  3388. || !DLSYM_OPT (openacc.async.queue_callback,
  3389. openacc_async_queue_callback)
  3390. || !DLSYM_OPT (openacc.async.exec, openacc_async_exec)
  3391. || !DLSYM_OPT (openacc.async.dev2host, openacc_async_dev2host)
  3392. || !DLSYM_OPT (openacc.async.host2dev, openacc_async_host2dev)
  3393. || !DLSYM_OPT (openacc.get_property, openacc_get_property))
  3394. {
  3395. /* Require all the OpenACC handlers if we have
  3396. GOMP_OFFLOAD_CAP_OPENACC_200. */
  3397. err = "plugin missing OpenACC handler function";
  3398. goto fail;
  3399. }
  3400. unsigned cuda = 0;
  3401. cuda += DLSYM_OPT (openacc.cuda.get_current_device,
  3402. openacc_cuda_get_current_device);
  3403. cuda += DLSYM_OPT (openacc.cuda.get_current_context,
  3404. openacc_cuda_get_current_context);
  3405. cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_cuda_get_stream);
  3406. cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_cuda_set_stream);
  3407. if (cuda && cuda != 4)
  3408. {
  3409. /* Make sure all the CUDA functions are there if any of them are. */
  3410. err = "plugin missing OpenACC CUDA handler function";
  3411. goto fail;
  3412. }
  3413. }
  3414. #undef DLSYM
  3415. #undef DLSYM_OPT
  3416. return 1;
  3417. dl_fail:
  3418. err = dlerror ();
  3419. fail:
  3420. gomp_error ("while loading %s: %s", plugin_name, err);
  3421. if (last_missing)
  3422. gomp_error ("missing function was %s", last_missing);
  3423. if (plugin_handle)
  3424. dlclose (plugin_handle);
  3425. return 0;
  3426. }
  3427. /* This function finalizes all initialized devices. */
  3428. static void
  3429. gomp_target_fini (void)
  3430. {
  3431. int i;
  3432. for (i = 0; i < num_devices; i++)
  3433. {
  3434. bool ret = true;
  3435. struct gomp_device_descr *devicep = &devices[i];
  3436. gomp_mutex_lock (&devicep->lock);
  3437. if (devicep->state == GOMP_DEVICE_INITIALIZED)
  3438. ret = gomp_fini_device (devicep);
  3439. gomp_mutex_unlock (&devicep->lock);
  3440. if (!ret)
  3441. gomp_fatal ("device finalization failed");
  3442. }
  3443. }
  3444. /* This function initializes the runtime for offloading.
  3445. It parses the list of offload plugins, and tries to load these.
  3446. On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
  3447. will be set, and the array DEVICES initialized, containing descriptors for
  3448. corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
  3449. by the others. */
  3450. static void
  3451. gomp_target_init (void)
  3452. {
  3453. const char *prefix ="libgomp-plugin-";
  3454. const char *suffix = SONAME_SUFFIX (1);
  3455. const char *cur, *next;
  3456. char *plugin_name;
  3457. int i, new_num_devs;
  3458. int num_devs = 0, num_devs_openmp;
  3459. struct gomp_device_descr *devs = NULL;
  3460. if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_DISABLED)
  3461. return;
  3462. cur = OFFLOAD_PLUGINS;
  3463. if (*cur)
  3464. do
  3465. {
  3466. struct gomp_device_descr current_device;
  3467. size_t prefix_len, suffix_len, cur_len;
  3468. next = strchr (cur, ',');
  3469. prefix_len = strlen (prefix);
  3470. cur_len = next ? next - cur : strlen (cur);
  3471. suffix_len = strlen (suffix);
  3472. plugin_name = (char *) malloc (prefix_len + cur_len + suffix_len + 1);
  3473. if (!plugin_name)
  3474. {
  3475. num_devs = 0;
  3476. break;
  3477. }
  3478. memcpy (plugin_name, prefix, prefix_len);
  3479. memcpy (plugin_name + prefix_len, cur, cur_len);
  3480. memcpy (plugin_name + prefix_len + cur_len, suffix, suffix_len + 1);
  3481. if (gomp_load_plugin_for_device (&current_device, plugin_name))
  3482. {
  3483. new_num_devs = current_device.get_num_devices_func ();
  3484. if (new_num_devs >= 1)
  3485. {
  3486. /* Augment DEVICES and NUM_DEVICES. */
  3487. devs = realloc (devs, (num_devs + new_num_devs)
  3488. * sizeof (struct gomp_device_descr));
  3489. if (!devs)
  3490. {
  3491. num_devs = 0;
  3492. free (plugin_name);
  3493. break;
  3494. }
  3495. current_device.name = current_device.get_name_func ();
  3496. /* current_device.capabilities has already been set. */
  3497. current_device.type = current_device.get_type_func ();
  3498. current_device.mem_map.root = NULL;
  3499. current_device.state = GOMP_DEVICE_UNINITIALIZED;
  3500. for (i = 0; i < new_num_devs; i++)
  3501. {
  3502. current_device.target_id = i;
  3503. devs[num_devs] = current_device;
  3504. gomp_mutex_init (&devs[num_devs].lock);
  3505. num_devs++;
  3506. }
  3507. }
  3508. }
  3509. free (plugin_name);
  3510. cur = next + 1;
  3511. }
  3512. while (next);
  3513. /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
  3514. NUM_DEVICES_OPENMP. */
  3515. struct gomp_device_descr *devs_s
  3516. = malloc (num_devs * sizeof (struct gomp_device_descr));
  3517. if (!devs_s)
  3518. {
  3519. num_devs = 0;
  3520. free (devs);
  3521. devs = NULL;
  3522. }
  3523. num_devs_openmp = 0;
  3524. for (i = 0; i < num_devs; i++)
  3525. if (devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
  3526. devs_s[num_devs_openmp++] = devs[i];
  3527. int num_devs_after_openmp = num_devs_openmp;
  3528. for (i = 0; i < num_devs; i++)
  3529. if (!(devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
  3530. devs_s[num_devs_after_openmp++] = devs[i];
  3531. free (devs);
  3532. devs = devs_s;
  3533. for (i = 0; i < num_devs; i++)
  3534. {
  3535. /* The 'devices' array can be moved (by the realloc call) until we have
  3536. found all the plugins, so registering with the OpenACC runtime (which
  3537. takes a copy of the pointer argument) must be delayed until now. */
  3538. if (devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
  3539. goacc_register (&devs[i]);
  3540. }
  3541. num_devices = num_devs;
  3542. num_devices_openmp = num_devs_openmp;
  3543. devices = devs;
  3544. if (atexit (gomp_target_fini) != 0)
  3545. gomp_fatal ("atexit failed");
  3546. }
  3547. #else /* PLUGIN_SUPPORT */
  3548. /* If dlfcn.h is unavailable we always fallback to host execution.
  3549. GOMP_target* routines are just stubs for this case. */
  3550. static void
  3551. gomp_target_init (void)
  3552. {
  3553. }
  3554. #endif /* PLUGIN_SUPPORT */