offload_common.h 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553
  1. /*
  2. Copyright (c) 2014-2016 Intel Corporation. All Rights Reserved.
  3. Redistribution and use in source and binary forms, with or without
  4. modification, are permitted provided that the following conditions
  5. are met:
  6. * Redistributions of source code must retain the above copyright
  7. notice, this list of conditions and the following disclaimer.
  8. * Redistributions in binary form must reproduce the above copyright
  9. notice, this list of conditions and the following disclaimer in the
  10. documentation and/or other materials provided with the distribution.
  11. * Neither the name of Intel Corporation nor the names of its
  12. contributors may be used to endorse or promote products derived
  13. from this software without specific prior written permission.
  14. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  15. "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  16. LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  17. A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  18. HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  19. SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  20. LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  21. DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  22. THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  23. (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  24. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  25. */
  26. /*! \file
  27. \brief The parts of the runtime library common to host and target
  28. */
  29. #ifndef OFFLOAD_COMMON_H_INCLUDED
  30. #define OFFLOAD_COMMON_H_INCLUDED
  31. #include <stdio.h>
  32. #include <stdlib.h>
  33. #include <string.h>
  34. #include <memory.h>
  35. #include "offload.h"
  36. #include "offload_table.h"
  37. #include "offload_trace.h"
  38. #include "offload_timer.h"
  39. #include "offload_util.h"
  40. #include "cean_util.h"
  41. #include "dv_util.h"
  42. #include "liboffload_error_codes.h"
  43. #include <stdarg.h>
  44. // Use secure getenv if it's supported
  45. #ifdef HAVE_SECURE_GETENV
  46. #define getenv(x) secure_getenv(x)
  47. #elif HAVE___SECURE_GETENV
  48. #define getenv(x) __secure_getenv(x)
  49. #endif
  50. // Offload Library versioning
  51. DLL_LOCAL extern int offload_version;
  52. DLL_LOCAL extern int offload_version_count;
  53. // The debug routines
  54. // Host console and file logging
  55. DLL_LOCAL extern int console_enabled;
  56. DLL_LOCAL extern int offload_report_level;
  57. DLL_LOCAL extern const char *prefix;
  58. DLL_LOCAL extern int offload_number;
  59. #if !HOST_LIBRARY
  60. DLL_LOCAL extern int mic_index;
  61. #define OFFLOAD_DO_TRACE (offload_report_level == 3)
  62. #else
  63. #define OFFLOAD_DO_TRACE (offload_report_enabled && (offload_report_level == 3))
  64. #endif
  65. #if HOST_LIBRARY
  66. DLL_LOCAL void Offload_Report_Prolog(OffloadHostTimerData* timer_data);
  67. DLL_LOCAL void Offload_Report_Epilog(OffloadHostTimerData* timer_data);
  68. DLL_LOCAL void offload_report_free_data(OffloadHostTimerData * timer_data);
  69. DLL_LOCAL void Offload_Timer_Print(void);
  70. #ifndef TARGET_WINNT
  71. #define OFFLOAD_DEBUG_INCR_OFLD_NUM() \
  72. __sync_add_and_fetch(&offload_number, 1)
  73. #else
  74. #define OFFLOAD_DEBUG_INCR_OFLD_NUM() \
  75. _InterlockedIncrement(reinterpret_cast<long*>(&offload_number))
  76. #endif
  77. #define OFFLOAD_DEBUG_PRINT_TAG_PREFIX() \
  78. printf("%s: ", prefix);
  79. #define OFFLOAD_DEBUG_PRINT_PREFIX() \
  80. printf("%s: ", prefix);
  81. #else
  82. #define OFFLOAD_DEBUG_PRINT_PREFIX() \
  83. printf("%s%d: ", prefix, mic_index);
  84. #endif // HOST_LIBRARY
  85. #define OFFLOAD_TRACE(trace_level, ...) \
  86. if (console_enabled >= trace_level) { \
  87. OFFLOAD_DEBUG_PRINT_PREFIX(); \
  88. printf(__VA_ARGS__); \
  89. fflush(NULL); \
  90. }
  91. #if OFFLOAD_DEBUG > 0
  92. #define OFFLOAD_DEBUG_TRACE(level, ...) \
  93. OFFLOAD_TRACE(level, __VA_ARGS__)
  94. #define OFFLOAD_REPORT(level, offload_number, stage, ...) \
  95. if (OFFLOAD_DO_TRACE) { \
  96. offload_stage_print(stage, offload_number, __VA_ARGS__); \
  97. fflush(NULL); \
  98. }
  99. #define OFFLOAD_DEBUG_TRACE_1(level, offload_number, stage, ...) \
  100. if (OFFLOAD_DO_TRACE) { \
  101. offload_stage_print(stage, offload_number, __VA_ARGS__); \
  102. fflush(NULL); \
  103. } \
  104. if (!OFFLOAD_DO_TRACE) { \
  105. OFFLOAD_TRACE(level, __VA_ARGS__) \
  106. }
  107. #define OFFLOAD_DEBUG_DUMP_BYTES(level, a, b) \
  108. __dump_bytes(level, a, b)
  109. DLL_LOCAL extern void __dump_bytes(
  110. int level,
  111. const void *data,
  112. int len
  113. );
  114. #else
  115. #define OFFLOAD_DEBUG_LOG(level, ...)
  116. #define OFFLOAD_DEBUG_DUMP_BYTES(level, a, b)
  117. #endif
  118. // Runtime interface
  119. #define OFFLOAD_PREFIX(a) __offload_##a
  120. #define OFFLOAD_MALLOC OFFLOAD_PREFIX(malloc)
  121. #define OFFLOAD_FREE(a) _mm_free(a)
  122. // Forward functions
  123. extern void *OFFLOAD_MALLOC(size_t size, size_t align);
  124. // The Marshaller
  125. // Flags describing an offload
  126. //! Flags describing an offload
  127. union OffloadFlags{
  128. uint32_t flags;
  129. struct {
  130. uint32_t fortran_traceback : 1; //!< Fortran traceback requested
  131. uint32_t omp_async : 1; //!< OpenMP asynchronous offload
  132. } bits;
  133. };
  134. //! \enum Indicator for the type of entry on an offload item list.
  135. enum OffloadItemType {
  136. c_data = 1, //!< Plain data
  137. c_data_ptr, //!< Pointer data
  138. c_func_ptr, //!< Function pointer
  139. c_void_ptr, //!< void*
  140. c_string_ptr, //!< C string
  141. c_dv, //!< Dope vector variable
  142. c_dv_data, //!< Dope-vector data
  143. c_dv_data_slice, //!< Dope-vector data's slice
  144. c_dv_ptr, //!< Dope-vector variable pointer
  145. c_dv_ptr_data, //!< Dope-vector pointer data
  146. c_dv_ptr_data_slice,//!< Dope-vector pointer data's slice
  147. c_cean_var, //!< CEAN variable
  148. c_cean_var_ptr, //!< Pointer to CEAN variable
  149. c_data_ptr_array, //!< Pointer to data pointer array
  150. c_extended_type, //!< Is used to extend OffloadItemType
  151. //!< Actual OffloadItemType is in the
  152. //!< structure VarDescExtendedType
  153. c_func_ptr_array, //!< Pointer to function pointer array
  154. c_void_ptr_array, //!< Pointer to void* pointer array
  155. c_string_ptr_array, //!< Pointer to char* pointer array
  156. c_data_ptr_ptr, //!< Pointer to pointer to data (struct member)
  157. c_func_ptr_ptr, //!< Pointer to pointer to function (struct member)
  158. c_void_ptr_ptr, //!< Pointer to pointer to void* (struct member)
  159. c_string_ptr_ptr, //!< Pointer to pointer to string (struct member)
  160. c_cean_var_ptr_ptr //!< Pointer to pointer to cean var (struct member)
  161. };
  162. #define TYPE_IS_PTR_TO_PTR(t) ((t) == c_string_ptr_ptr || \
  163. (t) == c_data_ptr_ptr || \
  164. (t) == c_func_ptr_ptr || \
  165. (t) == c_void_ptr_ptr || \
  166. (t) == c_cean_var_ptr_ptr)
  167. #define VAR_TYPE_IS_PTR(t) ((t) == c_string_ptr || \
  168. (t) == c_data_ptr || \
  169. (t) == c_cean_var_ptr || \
  170. (t) == c_dv_ptr || \
  171. TYPE_IS_PTR_TO_PTR(t))
  172. #define VAR_TYPE_IS_SCALAR(t) ((t) == c_data || \
  173. (t) == c_void_ptr || \
  174. (t) == c_cean_var || \
  175. (t) == c_dv)
  176. #define VAR_TYPE_IS_DV_DATA(t) ((t) == c_dv_data || \
  177. (t) == c_dv_ptr_data)
  178. #define VAR_TYPE_IS_DV_DATA_SLICE(t) ((t) == c_dv_data_slice || \
  179. (t) == c_dv_ptr_data_slice)
  180. //! \enum Specify direction to copy offloaded variable.
  181. enum OffloadParameterType {
  182. c_parameter_unknown = -1, //!< Unknown clause
  183. c_parameter_nocopy, //!< Variable listed in "nocopy" clause
  184. c_parameter_in, //!< Variable listed in "in" clause
  185. c_parameter_out, //!< Variable listed in "out" clause
  186. c_parameter_inout //!< Variable listed in "inout" clause
  187. };
  188. //! Flags describing an offloaded variable
  189. union varDescFlags {
  190. struct {
  191. //! source variable has persistent storage
  192. uint32_t is_static : 1;
  193. //! destination variable has persistent storage
  194. uint32_t is_static_dstn : 1;
  195. //! has length for c_dv && c_dv_ptr
  196. uint32_t has_length : 1;
  197. //! persisted local scalar is in stack buffer
  198. uint32_t is_stack_buf : 1;
  199. //! "targetptr" modifier used
  200. uint32_t targetptr : 1;
  201. //! "preallocated" modifier used
  202. uint32_t preallocated : 1;
  203. //! pointer to a pointer array
  204. uint32_t is_pointer : 1;
  205. //! buffer address is sent in data
  206. uint32_t sink_addr : 1;
  207. //! alloc displacement is sent in data
  208. uint32_t alloc_disp : 1;
  209. //! source data is noncontiguous
  210. uint32_t is_noncont_src : 1;
  211. //! destination data is noncontiguous
  212. uint32_t is_noncont_dst : 1;
  213. //! "OpenMP always" modifier used
  214. uint32_t always_copy : 1;
  215. //! "OpenMP delete" modifier used
  216. uint32_t always_delete : 1;
  217. //! structured data is noncontiguous
  218. uint32_t is_non_cont_struct : 1;
  219. //! CPU memory pinning/unpinning operation
  220. uint32_t pin : 1;
  221. //! Pointer to device memory
  222. uint32_t is_device_ptr : 1;
  223. //! Hostpointer with associated device pointer
  224. uint32_t use_device_ptr : 1;
  225. };
  226. uint32_t bits;
  227. };
  228. //! An Offload Variable descriptor
  229. struct VarDesc {
  230. //! OffloadItemTypes of source and destination
  231. union {
  232. struct {
  233. uint8_t dst : 4; //!< OffloadItemType of destination
  234. uint8_t src : 4; //!< OffloadItemType of source
  235. };
  236. uint8_t bits;
  237. } type;
  238. //! OffloadParameterType that describes direction of data transfer
  239. union {
  240. struct {
  241. uint8_t in : 1; //!< Set if IN or INOUT
  242. uint8_t out : 1; //!< Set if OUT or INOUT
  243. };
  244. uint8_t bits;
  245. } direction;
  246. uint8_t alloc_if; //!< alloc_if modifier value
  247. uint8_t free_if; //!< free_if modifier value
  248. uint32_t align; //!< MIC alignment requested for pointer data
  249. //! Not used by compiler; set to 0
  250. /*! Used by runtime as offset to data from start of MIC buffer */
  251. uint32_t mic_offset;
  252. //! Flags describing this variable
  253. varDescFlags flags;
  254. //! Not used by compiler; set to 0
  255. /*! Used by runtime as offset to base from data stored in a buffer */
  256. int64_t offset;
  257. //! Element byte-size of data to be transferred
  258. /*! For dope-vector, the size of the dope-vector */
  259. int64_t size;
  260. union {
  261. //! Set to 0 for array expressions and dope-vectors
  262. /*! Set to 1 for scalars */
  263. /*! Set to value of length modifier for pointers */
  264. int64_t count;
  265. //! Displacement not used by compiler
  266. int64_t disp;
  267. };
  268. //! This field not used by OpenMP 4.0
  269. /*! The alloc section expression in #pragma offload */
  270. union {
  271. void *alloc;
  272. int64_t ptr_arr_offset;
  273. };
  274. //! This field not used by OpenMP 4.0
  275. /*! The into section expression in #pragma offload */
  276. /*! For c_data_ptr_array this is the into ptr array */
  277. void *into;
  278. //! For an ordinary variable, address of the variable
  279. /*! For c_cean_var (C/C++ array expression),
  280. pointer to arr_desc, which is an array descriptor. */
  281. /*! For c_data_ptr_array (array of data pointers),
  282. pointer to ptr_array_descriptor,
  283. which is a descriptor for pointer array transfers. */
  284. void *ptr;
  285. };
  286. //! Auxiliary struct used when -g is enabled that holds variable names
  287. struct VarDesc2 {
  288. const char *sname; //!< Source name
  289. const char *dname; //!< Destination name (when "into" is used)
  290. };
  291. /*! When the OffloadItemType is c_data_ptr_array
  292. the ptr field of the main descriptor points to this struct. */
  293. /*! The type in VarDesc1 merely says c_cean_data_ptr, but the pointer
  294. type can be c_data_ptr, c_func_ptr, c_void_ptr, or c_string_ptr.
  295. Therefore the actual pointer type is in the flags field of VarDesc3. */
  296. /*! If flag_align_is_array/flag_alloc_if_is_array/flag_free_if_is_array
  297. is 0 then alignment/alloc_if/free_if are specified in VarDesc1. */
  298. /*! If flag_align_is_array/flag_alloc_if_is_array/flag_free_if_is_array
  299. is 1 then align_array/alloc_if_array/free_if_array specify
  300. the set of alignment/alloc_if/free_if values. */
  301. /*! For the other fields, if neither the scalar nor the array flag
  302. is set, then that modifier was not specified. If the bits are set
  303. they specify which modifier was set and whether it was a
  304. scalar or an array expression. */
  305. struct VarDesc3
  306. {
  307. void *ptr_array; //!< Pointer to arr_desc of array of pointers
  308. void *align_array; //!< Scalar value or pointer to arr_desc
  309. void *alloc_if_array; //!< Scalar value or pointer to arr_desc
  310. void *free_if_array; //!< Scalar value or pointer to arr_desc
  311. void *extent_start; //!< Scalar value or pointer to arr_desc
  312. void *extent_elements; //!< Scalar value or pointer to arr_desc
  313. void *into_start; //!< Scalar value or pointer to arr_desc
  314. void *into_elements; //!< Scalar value or pointer to arr_desc
  315. void *alloc_start; //!< Scalar value or pointer to arr_desc
  316. void *alloc_elements; //!< Scalar value or pointer to arr_desc
  317. /*! Flags that describe the pointer type and whether each field
  318. is a scalar value or an array expression. */
  319. /*! First 6 bits are pointer array element type:
  320. c_data_ptr, c_func_ptr, c_void_ptr, c_string_ptr */
  321. /*! Then single bits specify: */
  322. /*! align_array is an array */
  323. /*! alloc_if_array is an array */
  324. /*! free_if_array is an array */
  325. /*! extent_start is a scalar expression */
  326. /*! extent_start is an array expression */
  327. /*! extent_elements is a scalar expression */
  328. /*! extent_elements is an array expression */
  329. /*! into_start is a scalar expression */
  330. /*! into_start is an array expression */
  331. /*! into_elements is a scalar expression */
  332. /*! into_elements is an array expression */
  333. /*! alloc_start is a scalar expression */
  334. /*! alloc_start is an array expression */
  335. /*! alloc_elements is a scalar expression */
  336. /*! alloc_elements is an array expression */
  337. uint32_t array_fields;
  338. };
  339. const int flag_align_is_array = 6;
  340. const int flag_alloc_if_is_array = 7;
  341. const int flag_free_if_is_array = 8;
  342. const int flag_extent_start_is_scalar = 9;
  343. const int flag_extent_start_is_array = 10;
  344. const int flag_extent_elements_is_scalar = 11;
  345. const int flag_extent_elements_is_array = 12;
  346. const int flag_into_start_is_scalar = 13;
  347. const int flag_into_start_is_array = 14;
  348. const int flag_into_elements_is_scalar = 15;
  349. const int flag_into_elements_is_array = 16;
  350. const int flag_alloc_start_is_scalar = 17;
  351. const int flag_alloc_start_is_array = 18;
  352. const int flag_alloc_elements_is_scalar = 19;
  353. const int flag_alloc_elements_is_array = 20;
  354. //! Extended Variable Descriptor. Since VarDesc uses 16 bits for
  355. //! OffloadItemType, we have exceeded that limit, So any Type
  356. //! greater than 15 will have Type set in VarDesc as c_extended_type
  357. //! and this structure will be used to represent those Types.
  358. typedef struct VarDescExtendedType {
  359. // Represents overflow of OffloadItemType
  360. uint32_t extended_type;
  361. //! For extended_type
  362. //! address of the variable
  363. //! Future Types can point to other descriptors
  364. void *ptr;
  365. } VarDescExtendedType;
  366. // The Marshaller
  367. class Marshaller
  368. {
  369. private:
  370. // Start address of buffer
  371. char *buffer_start;
  372. // Current pointer within buffer
  373. char *buffer_ptr;
  374. // Physical size of data sent (including flags)
  375. long long buffer_size;
  376. // User data sent/received
  377. long long tfr_size;
  378. public:
  379. // Constructor
  380. Marshaller() :
  381. buffer_start(0), buffer_ptr(0),
  382. buffer_size(0), tfr_size(0)
  383. {
  384. }
  385. // Return count of user data sent/received
  386. long long get_tfr_size() const
  387. {
  388. return tfr_size;
  389. }
  390. // Return pointer to buffer
  391. char *get_buffer_start() const
  392. {
  393. return buffer_start;
  394. }
  395. // Return current size of data in buffer
  396. long long get_buffer_size() const
  397. {
  398. return buffer_size;
  399. }
  400. // Set buffer pointer
  401. void init_buffer(
  402. char *d,
  403. long long s
  404. )
  405. {
  406. buffer_start = buffer_ptr = d;
  407. buffer_size = s;
  408. }
  409. // Send data
  410. void send_data(
  411. const void *data,
  412. int64_t length
  413. );
  414. // Receive data
  415. void receive_data(
  416. void *data,
  417. int64_t length
  418. );
  419. // Send function pointer
  420. void send_func_ptr(
  421. const void* data
  422. );
  423. // Receive function pointer
  424. void receive_func_ptr(
  425. const void** data
  426. );
  427. };
  428. // End of the Marshaller
  429. // The offloaded function descriptor.
  430. // Sent from host to target to specify which function to run.
  431. // Also, sets console and file tracing levels.
  432. struct FunctionDescriptor
  433. {
  434. // Input data size.
  435. long long in_datalen;
  436. // Output data size.
  437. long long out_datalen;
  438. // Whether trace is requested on console.
  439. // A value of 1 produces only function name and data sent/received.
  440. // Values > 1 produce copious trace information.
  441. uint8_t console_enabled;
  442. // Flag controlling timing on the target side.
  443. // Values > 0 enable timing on sink.
  444. uint8_t timer_enabled;
  445. int offload_report_level;
  446. int offload_number;
  447. // number of variable descriptors
  448. int vars_num;
  449. // inout data offset if data is passed as misc/return data
  450. // otherwise it should be zero.
  451. int data_offset;
  452. // The name of the offloaded function
  453. char data[];
  454. };
  455. // typedef OFFLOAD.
  456. // Pointer to OffloadDescriptor.
  457. typedef struct OffloadDescriptor *OFFLOAD;
  458. // Use for setting affinity of a stream
  459. enum affinity_type {
  460. affinity_compact,
  461. affinity_scatter
  462. };
  463. struct affinity_spec {
  464. uint64_t sink_mask[16];
  465. int affinity_type;
  466. int num_cores;
  467. int num_threads;
  468. };
  469. #endif // OFFLOAD_COMMON_H_INCLUDED