libgomp-plugin-intelmic.cpp 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540
  1. /* Plugin for offload execution on Intel MIC devices.
  2. Copyright (C) 2014-2016 Free Software Foundation, Inc.
  3. Contributed by Ilya Verbin <ilya.verbin@intel.com>.
  4. This file is part of the GNU Offloading and Multi Processing Library
  5. (libgomp).
  6. Libgomp is free software; you can redistribute it and/or modify it
  7. under the terms of the GNU General Public License as published by
  8. the Free Software Foundation; either version 3, or (at your option)
  9. any later version.
  10. Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
  11. WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  12. FOR A PARTICULAR PURPOSE. See the GNU General Public License for
  13. more details.
  14. Under Section 7 of GPL version 3, you are granted additional
  15. permissions described in the GCC Runtime Library Exception, version
  16. 3.1, as published by the Free Software Foundation.
  17. You should have received a copy of the GNU General Public License and
  18. a copy of the GCC Runtime Library Exception along with this program;
  19. see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
  20. <http://www.gnu.org/licenses/>. */
  21. /* Host side part of a libgomp plugin. */
  22. #include <stdint.h>
  23. #include <stdio.h>
  24. #include <stdlib.h>
  25. #include <string.h>
  26. #include <utility>
  27. #include <vector>
  28. #include <map>
  29. #include "libgomp-plugin.h"
  30. #include "compiler_if_host.h"
  31. #include "main_target_image.h"
  32. #include "gomp-constants.h"
  33. #define OFFLOAD_ACTIVE_WAIT_ENV "OFFLOAD_ACTIVE_WAIT"
  34. #ifdef DEBUG
  35. #define TRACE(...) \
  36. { \
  37. fprintf (stderr, "HOST:\t%s:%s ", __FILE__, __FUNCTION__); \
  38. fprintf (stderr, __VA_ARGS__); \
  39. fprintf (stderr, "\n"); \
  40. }
  41. #else
  42. #define TRACE { }
  43. #endif
  44. /* Start/end addresses of functions and global variables on a device. */
  45. typedef std::vector<addr_pair> AddrVect;
  46. /* Addresses for one image and all devices. */
  47. typedef std::vector<AddrVect> DevAddrVect;
  48. /* Addresses for all images and all devices. */
  49. typedef std::map<const void *, DevAddrVect> ImgDevAddrMap;
  50. /* Image descriptor needed by __offload_[un]register_image. */
  51. struct TargetImageDesc {
  52. int64_t size;
  53. /* 10 characters is enough for max int value. */
  54. char name[sizeof ("lib0000000000.so")];
  55. char data[];
  56. };
  57. /* Image descriptors, indexed by a pointer obtained from libgomp. */
  58. typedef std::map<const void *, TargetImageDesc *> ImgDescMap;
  59. /* Total number of available devices. */
  60. static int num_devices;
  61. /* Total number of shared libraries with offloading to Intel MIC. */
  62. static int num_images;
  63. /* Two dimensional array: one key is a pointer to image,
  64. second key is number of device. Contains a vector of pointer pairs. */
  65. static ImgDevAddrMap *address_table;
  66. /* Descriptors of all images, registered in liboffloadmic. */
  67. static ImgDescMap *image_descriptors;
  68. /* Thread-safe registration of the main image. */
  69. static pthread_once_t main_image_is_registered = PTHREAD_ONCE_INIT;
  70. static VarDesc vd_host2tgt = {
  71. { 1, 1 }, /* dst, src */
  72. { 1, 0 }, /* in, out */
  73. 1, /* alloc_if */
  74. 1, /* free_if */
  75. 4, /* align */
  76. 0, /* mic_offset */
  77. { 0, 0, 0, 0, 0, 0, 0, 0 }, /* is_static, is_static_dstn, has_length,
  78. is_stack_buf, sink_addr, alloc_disp,
  79. is_noncont_src, is_noncont_dst */
  80. 0, /* offset */
  81. 0, /* size */
  82. 1, /* count */
  83. 0, /* alloc */
  84. 0, /* into */
  85. 0 /* ptr */
  86. };
  87. static VarDesc vd_tgt2host = {
  88. { 1, 1 }, /* dst, src */
  89. { 0, 1 }, /* in, out */
  90. 1, /* alloc_if */
  91. 1, /* free_if */
  92. 4, /* align */
  93. 0, /* mic_offset */
  94. { 0, 0, 0, 0, 0, 0, 0, 0 }, /* is_static, is_static_dstn, has_length,
  95. is_stack_buf, sink_addr, alloc_disp,
  96. is_noncont_src, is_noncont_dst */
  97. 0, /* offset */
  98. 0, /* size */
  99. 1, /* count */
  100. 0, /* alloc */
  101. 0, /* into */
  102. 0 /* ptr */
  103. };
  104. __attribute__((constructor))
  105. static void
  106. init (void)
  107. {
  108. const char *active_wait = getenv (OFFLOAD_ACTIVE_WAIT_ENV);
  109. /* Disable active wait by default to avoid useless CPU usage. */
  110. if (!active_wait)
  111. setenv (OFFLOAD_ACTIVE_WAIT_ENV, "0", 0);
  112. address_table = new ImgDevAddrMap;
  113. image_descriptors = new ImgDescMap;
  114. num_devices = _Offload_number_of_devices ();
  115. }
  116. extern "C" const char *
  117. GOMP_OFFLOAD_get_name (void)
  118. {
  119. const char *res = "intelmic";
  120. TRACE ("(): return %s", res);
  121. return res;
  122. }
  123. extern "C" unsigned int
  124. GOMP_OFFLOAD_get_caps (void)
  125. {
  126. unsigned int res = GOMP_OFFLOAD_CAP_OPENMP_400;
  127. TRACE ("(): return %x", res);
  128. return res;
  129. }
  130. extern "C" int
  131. GOMP_OFFLOAD_get_type (void)
  132. {
  133. enum offload_target_type res = OFFLOAD_TARGET_TYPE_INTEL_MIC;
  134. TRACE ("(): return %d", res);
  135. return res;
  136. }
  137. extern "C" int
  138. GOMP_OFFLOAD_get_num_devices (void)
  139. {
  140. TRACE ("(): return %d", num_devices);
  141. return num_devices;
  142. }
  143. static bool
  144. offload (const char *file, uint64_t line, int device, const char *name,
  145. int num_vars, VarDesc *vars, const void **async_data)
  146. {
  147. OFFLOAD ofld = __offload_target_acquire1 (&device, file, line);
  148. if (ofld)
  149. {
  150. if (async_data == NULL)
  151. return __offload_offload1 (ofld, name, 0, num_vars, vars, NULL, 0,
  152. NULL, NULL);
  153. else
  154. {
  155. OffloadFlags flags;
  156. flags.flags = 0;
  157. flags.bits.omp_async = 1;
  158. return __offload_offload3 (ofld, name, 0, num_vars, vars, NULL, 0,
  159. NULL, async_data, 0, NULL, flags, NULL);
  160. }
  161. }
  162. else
  163. {
  164. GOMP_PLUGIN_error ("%s:%d: Offload target acquire failed\n", file, line);
  165. return false;
  166. }
  167. }
  168. static void
  169. register_main_image ()
  170. {
  171. /* Do not check the return value, because old versions of liboffloadmic did
  172. not have return values. */
  173. __offload_register_image (&main_target_image);
  174. /* liboffloadmic will call GOMP_PLUGIN_target_task_completion when
  175. asynchronous task on target is completed. */
  176. __offload_register_task_callback (GOMP_PLUGIN_target_task_completion);
  177. }
  178. /* liboffloadmic loads and runs offload_target_main on all available devices
  179. during a first call to offload (). */
  180. extern "C" bool
  181. GOMP_OFFLOAD_init_device (int device)
  182. {
  183. TRACE ("(device = %d)", device);
  184. pthread_once (&main_image_is_registered, register_main_image);
  185. return offload (__FILE__, __LINE__, device, "__offload_target_init_proc", 0,
  186. NULL, NULL);
  187. }
  188. extern "C" bool
  189. GOMP_OFFLOAD_fini_device (int device)
  190. {
  191. TRACE ("(device = %d)", device);
  192. /* liboffloadmic will finalize target processes on all available devices. */
  193. __offload_unregister_image (&main_target_image);
  194. return true;
  195. }
  196. static bool
  197. get_target_table (int device, int &num_funcs, int &num_vars, void **&table)
  198. {
  199. VarDesc vd1[2] = { vd_tgt2host, vd_tgt2host };
  200. vd1[0].ptr = &num_funcs;
  201. vd1[0].size = sizeof (num_funcs);
  202. vd1[1].ptr = &num_vars;
  203. vd1[1].size = sizeof (num_vars);
  204. if (!offload (__FILE__, __LINE__, device, "__offload_target_table_p1", 2,
  205. vd1, NULL))
  206. return false;
  207. int table_size = num_funcs + 2 * num_vars;
  208. if (table_size > 0)
  209. {
  210. table = new void * [table_size];
  211. VarDesc vd2;
  212. vd2 = vd_tgt2host;
  213. vd2.ptr = table;
  214. vd2.size = table_size * sizeof (void *);
  215. return offload (__FILE__, __LINE__, device, "__offload_target_table_p2",
  216. 1, &vd2, NULL);
  217. }
  218. return true;
  219. }
  220. /* Offload TARGET_IMAGE to all available devices and fill address_table with
  221. corresponding target addresses. */
  222. static bool
  223. offload_image (const void *target_image)
  224. {
  225. void *image_start = ((void **) target_image)[0];
  226. void *image_end = ((void **) target_image)[1];
  227. TRACE ("(target_image = %p { %p, %p })",
  228. target_image, image_start, image_end);
  229. int64_t image_size = (uintptr_t) image_end - (uintptr_t) image_start;
  230. TargetImageDesc *image = (TargetImageDesc *) malloc (offsetof (TargetImageDesc, data)
  231. + image_size);
  232. if (!image)
  233. {
  234. GOMP_PLUGIN_error ("%s: Can't allocate memory\n", __FILE__);
  235. return false;
  236. }
  237. image->size = image_size;
  238. sprintf (image->name, "lib%010d.so", num_images++);
  239. memcpy (image->data, image_start, image->size);
  240. TRACE ("() __offload_register_image %s { %p, %d }",
  241. image->name, image_start, image->size);
  242. /* Do not check the return value, because old versions of liboffloadmic did
  243. not have return values. */
  244. __offload_register_image (image);
  245. /* Receive tables for target_image from all devices. */
  246. DevAddrVect dev_table;
  247. bool ret = true;
  248. for (int dev = 0; dev < num_devices; dev++)
  249. {
  250. int num_funcs = 0;
  251. int num_vars = 0;
  252. void **table = NULL;
  253. ret &= get_target_table (dev, num_funcs, num_vars, table);
  254. AddrVect curr_dev_table;
  255. for (int i = 0; i < num_funcs; i++)
  256. {
  257. addr_pair tgt_addr;
  258. tgt_addr.start = (uintptr_t) table[i];
  259. tgt_addr.end = tgt_addr.start + 1;
  260. TRACE ("() func %d:\t0x%llx..0x%llx", i,
  261. tgt_addr.start, tgt_addr.end);
  262. curr_dev_table.push_back (tgt_addr);
  263. }
  264. for (int i = 0; i < num_vars; i++)
  265. {
  266. addr_pair tgt_addr;
  267. tgt_addr.start = (uintptr_t) table[num_funcs+i*2];
  268. tgt_addr.end = tgt_addr.start + (uintptr_t) table[num_funcs+i*2+1];
  269. TRACE ("() var %d:\t0x%llx..0x%llx", i, tgt_addr.start, tgt_addr.end);
  270. curr_dev_table.push_back (tgt_addr);
  271. }
  272. dev_table.push_back (curr_dev_table);
  273. delete [] table;
  274. }
  275. address_table->insert (std::make_pair (target_image, dev_table));
  276. image_descriptors->insert (std::make_pair (target_image, image));
  277. return ret;
  278. }
  279. /* Return the libgomp version number we're compatible with. There is
  280. no requirement for cross-version compatibility. */
  281. extern "C" unsigned
  282. GOMP_OFFLOAD_version (void)
  283. {
  284. return GOMP_VERSION;
  285. }
  286. extern "C" int
  287. GOMP_OFFLOAD_load_image (int device, const unsigned version,
  288. const void *target_image, addr_pair **result)
  289. {
  290. TRACE ("(device = %d, target_image = %p)", device, target_image);
  291. if (GOMP_VERSION_DEV (version) > GOMP_VERSION_INTEL_MIC)
  292. {
  293. GOMP_PLUGIN_error ("Offload data incompatible with intelmic plugin"
  294. " (expected %u, received %u)",
  295. GOMP_VERSION_INTEL_MIC, GOMP_VERSION_DEV (version));
  296. return -1;
  297. }
  298. /* If target_image is already present in address_table, then there is no need
  299. to offload it. */
  300. if (address_table->count (target_image) == 0)
  301. {
  302. /* If fail, return -1 as error code. */
  303. if (!offload_image (target_image))
  304. return -1;
  305. }
  306. AddrVect *curr_dev_table = &(*address_table)[target_image][device];
  307. int table_size = curr_dev_table->size ();
  308. addr_pair *table = (addr_pair *) malloc (table_size * sizeof (addr_pair));
  309. if (table == NULL)
  310. {
  311. GOMP_PLUGIN_error ("%s: Can't allocate memory\n", __FILE__);
  312. return -1;
  313. }
  314. std::copy (curr_dev_table->begin (), curr_dev_table->end (), table);
  315. *result = table;
  316. return table_size;
  317. }
  318. extern "C" bool
  319. GOMP_OFFLOAD_unload_image (int device, unsigned version,
  320. const void *target_image)
  321. {
  322. if (GOMP_VERSION_DEV (version) > GOMP_VERSION_INTEL_MIC)
  323. {
  324. GOMP_PLUGIN_error ("Offload data incompatible with intelmic plugin"
  325. " (expected %u, received %u)",
  326. GOMP_VERSION_INTEL_MIC, GOMP_VERSION_DEV (version));
  327. return false;
  328. }
  329. TRACE ("(device = %d, target_image = %p)", device, target_image);
  330. /* liboffloadmic unloads the image from all available devices. */
  331. if (image_descriptors->count (target_image) > 0)
  332. {
  333. TargetImageDesc *image_desc = (*image_descriptors)[target_image];
  334. __offload_unregister_image (image_desc);
  335. free (image_desc);
  336. address_table->erase (target_image);
  337. image_descriptors->erase (target_image);
  338. }
  339. return true;
  340. }
  341. extern "C" void *
  342. GOMP_OFFLOAD_alloc (int device, size_t size)
  343. {
  344. TRACE ("(device = %d, size = %d)", device, size);
  345. void *tgt_ptr;
  346. VarDesc vd[2] = { vd_host2tgt, vd_tgt2host };
  347. vd[0].ptr = &size;
  348. vd[0].size = sizeof (size);
  349. vd[1].ptr = &tgt_ptr;
  350. vd[1].size = sizeof (void *);
  351. if (!offload (__FILE__, __LINE__, device, "__offload_target_alloc", 2,
  352. vd, NULL))
  353. return NULL;
  354. return tgt_ptr;
  355. }
  356. extern "C" bool
  357. GOMP_OFFLOAD_free (int device, void *tgt_ptr)
  358. {
  359. TRACE ("(device = %d, tgt_ptr = %p)", device, tgt_ptr);
  360. VarDesc vd = vd_host2tgt;
  361. vd.ptr = &tgt_ptr;
  362. vd.size = sizeof (void *);
  363. return offload (__FILE__, __LINE__, device, "__offload_target_free", 1,
  364. &vd, NULL);
  365. }
  366. extern "C" bool
  367. GOMP_OFFLOAD_host2dev (int device, void *tgt_ptr, const void *host_ptr,
  368. size_t size)
  369. {
  370. TRACE ("(device = %d, tgt_ptr = %p, host_ptr = %p, size = %d)",
  371. device, tgt_ptr, host_ptr, size);
  372. if (!size)
  373. return true;
  374. VarDesc vd1[2] = { vd_host2tgt, vd_host2tgt };
  375. vd1[0].ptr = &tgt_ptr;
  376. vd1[0].size = sizeof (void *);
  377. vd1[1].ptr = &size;
  378. vd1[1].size = sizeof (size);
  379. if (!offload (__FILE__, __LINE__, device, "__offload_target_host2tgt_p1", 2,
  380. vd1, NULL))
  381. return false;
  382. VarDesc vd2 = vd_host2tgt;
  383. vd2.ptr = (void *) host_ptr;
  384. vd2.size = size;
  385. return offload (__FILE__, __LINE__, device, "__offload_target_host2tgt_p2", 1,
  386. &vd2, NULL);
  387. }
  388. extern "C" bool
  389. GOMP_OFFLOAD_dev2host (int device, void *host_ptr, const void *tgt_ptr,
  390. size_t size)
  391. {
  392. TRACE ("(device = %d, host_ptr = %p, tgt_ptr = %p, size = %d)",
  393. device, host_ptr, tgt_ptr, size);
  394. if (!size)
  395. return true;
  396. VarDesc vd1[2] = { vd_host2tgt, vd_host2tgt };
  397. vd1[0].ptr = &tgt_ptr;
  398. vd1[0].size = sizeof (void *);
  399. vd1[1].ptr = &size;
  400. vd1[1].size = sizeof (size);
  401. if (!offload (__FILE__, __LINE__, device, "__offload_target_tgt2host_p1", 2,
  402. vd1, NULL))
  403. return false;
  404. VarDesc vd2 = vd_tgt2host;
  405. vd2.ptr = (void *) host_ptr;
  406. vd2.size = size;
  407. return offload (__FILE__, __LINE__, device, "__offload_target_tgt2host_p2", 1,
  408. &vd2, NULL);
  409. }
  410. extern "C" bool
  411. GOMP_OFFLOAD_dev2dev (int device, void *dst_ptr, const void *src_ptr,
  412. size_t size)
  413. {
  414. TRACE ("(device = %d, dst_ptr = %p, src_ptr = %p, size = %d)",
  415. device, dst_ptr, src_ptr, size);
  416. if (!size)
  417. return true;
  418. VarDesc vd[3] = { vd_host2tgt, vd_host2tgt, vd_host2tgt };
  419. vd[0].ptr = &dst_ptr;
  420. vd[0].size = sizeof (void *);
  421. vd[1].ptr = &src_ptr;
  422. vd[1].size = sizeof (void *);
  423. vd[2].ptr = &size;
  424. vd[2].size = sizeof (size);
  425. return offload (__FILE__, __LINE__, device, "__offload_target_tgt2tgt", 3,
  426. vd, NULL);
  427. }
  428. extern "C" void
  429. GOMP_OFFLOAD_async_run (int device, void *tgt_fn, void *tgt_vars,
  430. void **, void *async_data)
  431. {
  432. TRACE ("(device = %d, tgt_fn = %p, tgt_vars = %p, async_data = %p)", device,
  433. tgt_fn, tgt_vars, async_data);
  434. VarDesc vd[2] = { vd_host2tgt, vd_host2tgt };
  435. vd[0].ptr = &tgt_fn;
  436. vd[0].size = sizeof (void *);
  437. vd[1].ptr = &tgt_vars;
  438. vd[1].size = sizeof (void *);
  439. offload (__FILE__, __LINE__, device, "__offload_target_run", 2, vd,
  440. (const void **) async_data);
  441. }
  442. extern "C" void
  443. GOMP_OFFLOAD_run (int device, void *tgt_fn, void *tgt_vars, void **)
  444. {
  445. TRACE ("(device = %d, tgt_fn = %p, tgt_vars = %p)", device, tgt_fn, tgt_vars);
  446. GOMP_OFFLOAD_async_run (device, tgt_fn, tgt_vars, NULL, NULL);
  447. }