fpu-387.h 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497
  1. /* FPU-related code for x86 and x86_64 processors.
  2. Copyright (C) 2005-2022 Free Software Foundation, Inc.
  3. Contributed by Francois-Xavier Coudert <coudert@clipper.ens.fr>
  4. This file is part of the GNU Fortran 95 runtime library (libgfortran).
  5. Libgfortran is free software; you can redistribute it and/or
  6. modify it under the terms of the GNU General Public
  7. License as published by the Free Software Foundation; either
  8. version 3 of the License, or (at your option) any later version.
  9. Libgfortran is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. Under Section 7 of GPL version 3, you are granted additional
  14. permissions described in the GCC Runtime Library Exception, version
  15. 3.1, as published by the Free Software Foundation.
  16. You should have received a copy of the GNU General Public License and
  17. a copy of the GCC Runtime Library Exception along with this program;
  18. see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
  19. <http://www.gnu.org/licenses/>. */
  20. #ifndef __SSE_MATH__
  21. #include "cpuid.h"
  22. #endif
  23. static int
  24. has_sse (void)
  25. {
  26. #ifndef __SSE_MATH__
  27. unsigned int eax, ebx, ecx, edx;
  28. if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx))
  29. return 0;
  30. return edx & bit_SSE;
  31. #else
  32. return 1;
  33. #endif
  34. }
  35. /* i387 exceptions -- see linux <fpu_control.h> header file for details. */
  36. #define _FPU_MASK_IM 0x01
  37. #define _FPU_MASK_DM 0x02
  38. #define _FPU_MASK_ZM 0x04
  39. #define _FPU_MASK_OM 0x08
  40. #define _FPU_MASK_UM 0x10
  41. #define _FPU_MASK_PM 0x20
  42. #define _FPU_MASK_ALL 0x3f
  43. #define _FPU_EX_ALL 0x3f
  44. /* i387 rounding modes. */
  45. #define _FPU_RC_NEAREST 0x0
  46. #define _FPU_RC_DOWN 0x1
  47. #define _FPU_RC_UP 0x2
  48. #define _FPU_RC_ZERO 0x3
  49. #define _FPU_RC_MASK 0x3
  50. /* Enable flush to zero mode. */
  51. #define MXCSR_FTZ (1 << 15)
  52. /* This structure corresponds to the layout of the block
  53. written by FSTENV. */
  54. struct fenv
  55. {
  56. unsigned short int __control_word;
  57. unsigned short int __unused1;
  58. unsigned short int __status_word;
  59. unsigned short int __unused2;
  60. unsigned short int __tags;
  61. unsigned short int __unused3;
  62. unsigned int __eip;
  63. unsigned short int __cs_selector;
  64. unsigned int __opcode:11;
  65. unsigned int __unused4:5;
  66. unsigned int __data_offset;
  67. unsigned short int __data_selector;
  68. unsigned short int __unused5;
  69. unsigned int __mxcsr;
  70. } __attribute__ ((gcc_struct));
  71. /* Check we can actually store the FPU state in the allocated size. */
  72. _Static_assert (sizeof(struct fenv) <= (size_t) GFC_FPE_STATE_BUFFER_SIZE,
  73. "GFC_FPE_STATE_BUFFER_SIZE is too small");
  74. #ifdef __SSE_MATH__
  75. # define __math_force_eval_div(x, y) \
  76. do { \
  77. __asm__ ("" : "+x" (x)); __asm__ __volatile__ ("" : : "x" (x / y)); \
  78. } while (0)
  79. #else
  80. # define __math_force_eval_div(x, y) \
  81. do { \
  82. __asm__ ("" : "+t" (x)); __asm__ __volatile__ ("" : : "f" (x / y)); \
  83. } while (0)
  84. #endif
  85. /* Raise the supported floating-point exceptions from EXCEPTS. Other
  86. bits in EXCEPTS are ignored. Code originally borrowed from
  87. libatomic/config/x86/fenv.c. */
  88. static void
  89. local_feraiseexcept (int excepts)
  90. {
  91. struct fenv temp;
  92. if (excepts & _FPU_MASK_IM)
  93. {
  94. float f = 0.0f;
  95. __math_force_eval_div (f, f);
  96. }
  97. if (excepts & _FPU_MASK_DM)
  98. {
  99. __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
  100. temp.__status_word |= _FPU_MASK_DM;
  101. __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
  102. __asm__ __volatile__ ("fwait");
  103. }
  104. if (excepts & _FPU_MASK_ZM)
  105. {
  106. float f = 1.0f, g = 0.0f;
  107. __math_force_eval_div (f, g);
  108. }
  109. if (excepts & _FPU_MASK_OM)
  110. {
  111. __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
  112. temp.__status_word |= _FPU_MASK_OM;
  113. __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
  114. __asm__ __volatile__ ("fwait");
  115. }
  116. if (excepts & _FPU_MASK_UM)
  117. {
  118. __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
  119. temp.__status_word |= _FPU_MASK_UM;
  120. __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
  121. __asm__ __volatile__ ("fwait");
  122. }
  123. if (excepts & _FPU_MASK_PM)
  124. {
  125. float f = 1.0f, g = 3.0f;
  126. __math_force_eval_div (f, g);
  127. }
  128. }
  129. void
  130. set_fpu_trap_exceptions (int trap, int notrap)
  131. {
  132. int exc_set = 0, exc_clr = 0;
  133. unsigned short cw;
  134. if (trap & GFC_FPE_INVALID) exc_set |= _FPU_MASK_IM;
  135. if (trap & GFC_FPE_DENORMAL) exc_set |= _FPU_MASK_DM;
  136. if (trap & GFC_FPE_ZERO) exc_set |= _FPU_MASK_ZM;
  137. if (trap & GFC_FPE_OVERFLOW) exc_set |= _FPU_MASK_OM;
  138. if (trap & GFC_FPE_UNDERFLOW) exc_set |= _FPU_MASK_UM;
  139. if (trap & GFC_FPE_INEXACT) exc_set |= _FPU_MASK_PM;
  140. if (notrap & GFC_FPE_INVALID) exc_clr |= _FPU_MASK_IM;
  141. if (notrap & GFC_FPE_DENORMAL) exc_clr |= _FPU_MASK_DM;
  142. if (notrap & GFC_FPE_ZERO) exc_clr |= _FPU_MASK_ZM;
  143. if (notrap & GFC_FPE_OVERFLOW) exc_clr |= _FPU_MASK_OM;
  144. if (notrap & GFC_FPE_UNDERFLOW) exc_clr |= _FPU_MASK_UM;
  145. if (notrap & GFC_FPE_INEXACT) exc_clr |= _FPU_MASK_PM;
  146. __asm__ __volatile__ ("fstcw\t%0" : "=m" (cw));
  147. cw |= exc_clr;
  148. cw &= ~exc_set;
  149. __asm__ __volatile__ ("fnclex\n\tfldcw\t%0" : : "m" (cw));
  150. if (has_sse())
  151. {
  152. unsigned int cw_sse;
  153. __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
  154. /* The SSE exception masks are shifted by 7 bits. */
  155. cw_sse |= (exc_clr << 7);
  156. cw_sse &= ~(exc_set << 7);
  157. /* Clear stalled exception flags. */
  158. cw_sse &= ~_FPU_EX_ALL;
  159. __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
  160. }
  161. }
  162. void
  163. set_fpu (void)
  164. {
  165. set_fpu_trap_exceptions (options.fpe, 0);
  166. }
  167. int
  168. get_fpu_trap_exceptions (void)
  169. {
  170. unsigned short cw;
  171. int mask;
  172. int res = 0;
  173. __asm__ __volatile__ ("fstcw\t%0" : "=m" (cw));
  174. mask = cw;
  175. if (has_sse())
  176. {
  177. unsigned int cw_sse;
  178. __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
  179. /* The SSE exception masks are shifted by 7 bits. */
  180. mask |= (cw_sse >> 7);
  181. }
  182. mask = ~mask & _FPU_MASK_ALL;
  183. if (mask & _FPU_MASK_IM) res |= GFC_FPE_INVALID;
  184. if (mask & _FPU_MASK_DM) res |= GFC_FPE_DENORMAL;
  185. if (mask & _FPU_MASK_ZM) res |= GFC_FPE_ZERO;
  186. if (mask & _FPU_MASK_OM) res |= GFC_FPE_OVERFLOW;
  187. if (mask & _FPU_MASK_UM) res |= GFC_FPE_UNDERFLOW;
  188. if (mask & _FPU_MASK_PM) res |= GFC_FPE_INEXACT;
  189. return res;
  190. }
  191. int
  192. support_fpu_trap (int flag __attribute__((unused)))
  193. {
  194. return 1;
  195. }
  196. int
  197. get_fpu_except_flags (void)
  198. {
  199. unsigned short cw;
  200. int excepts;
  201. int res = 0;
  202. __asm__ __volatile__ ("fnstsw\t%0" : "=am" (cw));
  203. excepts = cw;
  204. if (has_sse())
  205. {
  206. unsigned int cw_sse;
  207. __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
  208. excepts |= cw_sse;
  209. }
  210. excepts &= _FPU_EX_ALL;
  211. if (excepts & _FPU_MASK_IM) res |= GFC_FPE_INVALID;
  212. if (excepts & _FPU_MASK_DM) res |= GFC_FPE_DENORMAL;
  213. if (excepts & _FPU_MASK_ZM) res |= GFC_FPE_ZERO;
  214. if (excepts & _FPU_MASK_OM) res |= GFC_FPE_OVERFLOW;
  215. if (excepts & _FPU_MASK_UM) res |= GFC_FPE_UNDERFLOW;
  216. if (excepts & _FPU_MASK_PM) res |= GFC_FPE_INEXACT;
  217. return res;
  218. }
  219. void
  220. set_fpu_except_flags (int set, int clear)
  221. {
  222. struct fenv temp;
  223. int exc_set = 0, exc_clr = 0;
  224. /* Translate from GFC_PE_* values to _FPU_MASK_* values. */
  225. if (set & GFC_FPE_INVALID)
  226. exc_set |= _FPU_MASK_IM;
  227. if (clear & GFC_FPE_INVALID)
  228. exc_clr |= _FPU_MASK_IM;
  229. if (set & GFC_FPE_DENORMAL)
  230. exc_set |= _FPU_MASK_DM;
  231. if (clear & GFC_FPE_DENORMAL)
  232. exc_clr |= _FPU_MASK_DM;
  233. if (set & GFC_FPE_ZERO)
  234. exc_set |= _FPU_MASK_ZM;
  235. if (clear & GFC_FPE_ZERO)
  236. exc_clr |= _FPU_MASK_ZM;
  237. if (set & GFC_FPE_OVERFLOW)
  238. exc_set |= _FPU_MASK_OM;
  239. if (clear & GFC_FPE_OVERFLOW)
  240. exc_clr |= _FPU_MASK_OM;
  241. if (set & GFC_FPE_UNDERFLOW)
  242. exc_set |= _FPU_MASK_UM;
  243. if (clear & GFC_FPE_UNDERFLOW)
  244. exc_clr |= _FPU_MASK_UM;
  245. if (set & GFC_FPE_INEXACT)
  246. exc_set |= _FPU_MASK_PM;
  247. if (clear & GFC_FPE_INEXACT)
  248. exc_clr |= _FPU_MASK_PM;
  249. /* Change the flags. This is tricky on 387 (unlike SSE), because we have
  250. FNSTSW but no FLDSW instruction. */
  251. __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
  252. temp.__status_word &= ~exc_clr;
  253. __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
  254. /* Change the flags on SSE. */
  255. if (has_sse())
  256. {
  257. unsigned int cw_sse;
  258. __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
  259. cw_sse &= ~exc_clr;
  260. __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
  261. }
  262. local_feraiseexcept (exc_set);
  263. }
  264. int
  265. support_fpu_flag (int flag __attribute__((unused)))
  266. {
  267. return 1;
  268. }
  269. void
  270. set_fpu_rounding_mode (int round)
  271. {
  272. int round_mode;
  273. unsigned short cw;
  274. switch (round)
  275. {
  276. case GFC_FPE_TONEAREST:
  277. round_mode = _FPU_RC_NEAREST;
  278. break;
  279. case GFC_FPE_UPWARD:
  280. round_mode = _FPU_RC_UP;
  281. break;
  282. case GFC_FPE_DOWNWARD:
  283. round_mode = _FPU_RC_DOWN;
  284. break;
  285. case GFC_FPE_TOWARDZERO:
  286. round_mode = _FPU_RC_ZERO;
  287. break;
  288. default:
  289. return; /* Should be unreachable. */
  290. }
  291. __asm__ __volatile__ ("fnstcw\t%0" : "=m" (cw));
  292. /* The x87 round control bits are shifted by 10 bits. */
  293. cw &= ~(_FPU_RC_MASK << 10);
  294. cw |= round_mode << 10;
  295. __asm__ __volatile__ ("fldcw\t%0" : : "m" (cw));
  296. if (has_sse())
  297. {
  298. unsigned int cw_sse;
  299. __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
  300. /* The SSE round control bits are shifted by 13 bits. */
  301. cw_sse &= ~(_FPU_RC_MASK << 13);
  302. cw_sse |= round_mode << 13;
  303. __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
  304. }
  305. }
  306. int
  307. get_fpu_rounding_mode (void)
  308. {
  309. int round_mode;
  310. #ifdef __SSE_MATH__
  311. unsigned int cw;
  312. __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw));
  313. /* The SSE round control bits are shifted by 13 bits. */
  314. round_mode = cw >> 13;
  315. #else
  316. unsigned short cw;
  317. __asm__ __volatile__ ("fnstcw\t%0" : "=m" (cw));
  318. /* The x87 round control bits are shifted by 10 bits. */
  319. round_mode = cw >> 10;
  320. #endif
  321. round_mode &= _FPU_RC_MASK;
  322. switch (round_mode)
  323. {
  324. case _FPU_RC_NEAREST:
  325. return GFC_FPE_TONEAREST;
  326. case _FPU_RC_UP:
  327. return GFC_FPE_UPWARD;
  328. case _FPU_RC_DOWN:
  329. return GFC_FPE_DOWNWARD;
  330. case _FPU_RC_ZERO:
  331. return GFC_FPE_TOWARDZERO;
  332. default:
  333. return 0; /* Should be unreachable. */
  334. }
  335. }
  336. int
  337. support_fpu_rounding_mode (int mode __attribute__((unused)))
  338. {
  339. return 1;
  340. }
  341. void
  342. get_fpu_state (void *state)
  343. {
  344. struct fenv *envp = state;
  345. __asm__ __volatile__ ("fnstenv\t%0" : "=m" (*envp));
  346. /* fnstenv has the side effect of masking all exceptions, so we need
  347. to restore the control word after that. */
  348. __asm__ __volatile__ ("fldcw\t%0" : : "m" (envp->__control_word));
  349. if (has_sse())
  350. __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (envp->__mxcsr));
  351. }
  352. void
  353. set_fpu_state (void *state)
  354. {
  355. struct fenv *envp = state;
  356. /* glibc sources (sysdeps/x86_64/fpu/fesetenv.c) do something more
  357. complex than this, but I think it suffices in our case. */
  358. __asm__ __volatile__ ("fldenv\t%0" : : "m" (*envp));
  359. if (has_sse())
  360. __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (envp->__mxcsr));
  361. }
  362. int
  363. support_fpu_underflow_control (int kind)
  364. {
  365. if (!has_sse())
  366. return 0;
  367. return (kind == 4 || kind == 8) ? 1 : 0;
  368. }
  369. int
  370. get_fpu_underflow_mode (void)
  371. {
  372. unsigned int cw_sse;
  373. if (!has_sse())
  374. return 1;
  375. __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
  376. /* Return 0 for abrupt underflow (flush to zero), 1 for gradual underflow. */
  377. return (cw_sse & MXCSR_FTZ) ? 0 : 1;
  378. }
  379. void
  380. set_fpu_underflow_mode (int gradual)
  381. {
  382. unsigned int cw_sse;
  383. if (!has_sse())
  384. return;
  385. __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
  386. if (gradual)
  387. cw_sse &= ~MXCSR_FTZ;
  388. else
  389. cw_sse |= MXCSR_FTZ;
  390. __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
  391. }