lse.S 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319
  1. /* Out-of-line LSE atomics for AArch64 architecture.
  2. Copyright (C) 2019-2022 Free Software Foundation, Inc.
  3. Contributed by Linaro Ltd.
  4. This file is part of GCC.
  5. GCC is free software; you can redistribute it and/or modify it under
  6. the terms of the GNU General Public License as published by the Free
  7. Software Foundation; either version 3, or (at your option) any later
  8. version.
  9. GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  10. WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11. FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
  12. for more details.
  13. Under Section 7 of GPL version 3, you are granted additional
  14. permissions described in the GCC Runtime Library Exception, version
  15. 3.1, as published by the Free Software Foundation.
  16. You should have received a copy of the GNU General Public License and
  17. a copy of the GCC Runtime Library Exception along with this program;
  18. see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
  19. <http://www.gnu.org/licenses/>. */
  20. /*
  21. * The problem that we are trying to solve is operating system deployment
  22. * of ARMv8.1-Atomics, also known as Large System Exensions (LSE).
  23. *
  24. * There are a number of potential solutions for this problem which have
  25. * been proposed and rejected for various reasons. To recap:
  26. *
  27. * (1) Multiple builds. The dynamic linker will examine /lib64/atomics/
  28. * if HWCAP_ATOMICS is set, allowing entire libraries to be overwritten.
  29. * However, not all Linux distributions are happy with multiple builds,
  30. * and anyway it has no effect on main applications.
  31. *
  32. * (2) IFUNC. We could put these functions into libgcc_s.so, and have
  33. * a single copy of each function for all DSOs. However, ARM is concerned
  34. * that the branch-to-indirect-branch that is implied by using a PLT,
  35. * as required by IFUNC, is too much overhead for smaller cpus.
  36. *
  37. * (3) Statically predicted direct branches. This is the approach that
  38. * is taken here. These functions are linked into every DSO that uses them.
  39. * All of the symbols are hidden, so that the functions are called via a
  40. * direct branch. The choice of LSE vs non-LSE is done via one byte load
  41. * followed by a well-predicted direct branch. The functions are compiled
  42. * separately to minimize code size.
  43. */
  44. #include "auto-target.h"
  45. /* Tell the assembler to accept LSE instructions. */
  46. #ifdef HAVE_AS_LSE
  47. .arch armv8-a+lse
  48. #else
  49. .arch armv8-a
  50. #endif
  51. /* Declare the symbol gating the LSE implementations. */
  52. .hidden __aarch64_have_lse_atomics
  53. /* Turn size and memory model defines into mnemonic fragments. */
  54. #if SIZE == 1
  55. # define S b
  56. # define UXT uxtb
  57. # define B 0x00000000
  58. #elif SIZE == 2
  59. # define S h
  60. # define UXT uxth
  61. # define B 0x40000000
  62. #elif SIZE == 4 || SIZE == 8 || SIZE == 16
  63. # define S
  64. # define UXT mov
  65. # if SIZE == 4
  66. # define B 0x80000000
  67. # elif SIZE == 8
  68. # define B 0xc0000000
  69. # endif
  70. #else
  71. # error
  72. #endif
  73. #if MODEL == 1
  74. # define SUFF _relax
  75. # define A
  76. # define L
  77. # define M 0x000000
  78. # define N 0x000000
  79. #elif MODEL == 2
  80. # define SUFF _acq
  81. # define A a
  82. # define L
  83. # define M 0x400000
  84. # define N 0x800000
  85. #elif MODEL == 3
  86. # define SUFF _rel
  87. # define A
  88. # define L l
  89. # define M 0x008000
  90. # define N 0x400000
  91. #elif MODEL == 4
  92. # define SUFF _acq_rel
  93. # define A a
  94. # define L l
  95. # define M 0x408000
  96. # define N 0xc00000
  97. #else
  98. # error
  99. #endif
  100. /* Concatenate symbols. */
  101. #define glue2_(A, B) A ## B
  102. #define glue2(A, B) glue2_(A, B)
  103. #define glue3_(A, B, C) A ## B ## C
  104. #define glue3(A, B, C) glue3_(A, B, C)
  105. #define glue4_(A, B, C, D) A ## B ## C ## D
  106. #define glue4(A, B, C, D) glue4_(A, B, C, D)
  107. /* Select the size of a register, given a regno. */
  108. #define x(N) glue2(x, N)
  109. #define w(N) glue2(w, N)
  110. #if SIZE < 8
  111. # define s(N) w(N)
  112. #else
  113. # define s(N) x(N)
  114. #endif
  115. #define NAME(BASE) glue4(__aarch64_, BASE, SIZE, SUFF)
  116. #define LDXR glue4(ld, A, xr, S)
  117. #define STXR glue4(st, L, xr, S)
  118. /* Temporary registers used. Other than these, only the return value
  119. register (x0) and the flags are modified. */
  120. #define tmp0 16
  121. #define tmp1 17
  122. #define tmp2 15
  123. #define BTI_C hint 34
  124. /* Start and end a function. */
  125. .macro STARTFN name
  126. .text
  127. .balign 16
  128. .globl \name
  129. .hidden \name
  130. .type \name, %function
  131. .cfi_startproc
  132. \name:
  133. BTI_C
  134. .endm
  135. .macro ENDFN name
  136. .cfi_endproc
  137. .size \name, . - \name
  138. .endm
  139. /* Branch to LABEL if LSE is disabled. */
  140. .macro JUMP_IF_NOT_LSE label
  141. adrp x(tmp0), __aarch64_have_lse_atomics
  142. ldrb w(tmp0), [x(tmp0), :lo12:__aarch64_have_lse_atomics]
  143. cbz w(tmp0), \label
  144. .endm
  145. #ifdef L_cas
  146. STARTFN NAME(cas)
  147. JUMP_IF_NOT_LSE 8f
  148. #if SIZE < 16
  149. #ifdef HAVE_AS_LSE
  150. # define CAS glue4(cas, A, L, S) s(0), s(1), [x2]
  151. #else
  152. # define CAS .inst 0x08a07c41 + B + M
  153. #endif
  154. CAS /* s(0), s(1), [x2] */
  155. ret
  156. 8: UXT s(tmp0), s(0)
  157. 0: LDXR s(0), [x2]
  158. cmp s(0), s(tmp0)
  159. bne 1f
  160. STXR w(tmp1), s(1), [x2]
  161. cbnz w(tmp1), 0b
  162. 1: ret
  163. #else
  164. #define LDXP glue3(ld, A, xp)
  165. #define STXP glue3(st, L, xp)
  166. #ifdef HAVE_AS_LSE
  167. # define CASP glue3(casp, A, L) x0, x1, x2, x3, [x4]
  168. #else
  169. # define CASP .inst 0x48207c82 + M
  170. #endif
  171. CASP /* x0, x1, x2, x3, [x4] */
  172. ret
  173. 8: mov x(tmp0), x0
  174. mov x(tmp1), x1
  175. 0: LDXP x0, x1, [x4]
  176. cmp x0, x(tmp0)
  177. ccmp x1, x(tmp1), #0, eq
  178. bne 1f
  179. STXP w(tmp2), x2, x3, [x4]
  180. cbnz w(tmp2), 0b
  181. 1: ret
  182. #endif
  183. ENDFN NAME(cas)
  184. #endif
  185. #ifdef L_swp
  186. #ifdef HAVE_AS_LSE
  187. # define SWP glue4(swp, A, L, S) s(0), s(0), [x1]
  188. #else
  189. # define SWP .inst 0x38208020 + B + N
  190. #endif
  191. STARTFN NAME(swp)
  192. JUMP_IF_NOT_LSE 8f
  193. SWP /* s(0), s(0), [x1] */
  194. ret
  195. 8: mov s(tmp0), s(0)
  196. 0: LDXR s(0), [x1]
  197. STXR w(tmp1), s(tmp0), [x1]
  198. cbnz w(tmp1), 0b
  199. ret
  200. ENDFN NAME(swp)
  201. #endif
  202. #if defined(L_ldadd) || defined(L_ldclr) \
  203. || defined(L_ldeor) || defined(L_ldset)
  204. #ifdef L_ldadd
  205. #define LDNM ldadd
  206. #define OP add
  207. #define OPN 0x0000
  208. #elif defined(L_ldclr)
  209. #define LDNM ldclr
  210. #define OP bic
  211. #define OPN 0x1000
  212. #elif defined(L_ldeor)
  213. #define LDNM ldeor
  214. #define OP eor
  215. #define OPN 0x2000
  216. #elif defined(L_ldset)
  217. #define LDNM ldset
  218. #define OP orr
  219. #define OPN 0x3000
  220. #else
  221. #error
  222. #endif
  223. #ifdef HAVE_AS_LSE
  224. # define LDOP glue4(LDNM, A, L, S) s(0), s(0), [x1]
  225. #else
  226. # define LDOP .inst 0x38200020 + OPN + B + N
  227. #endif
  228. STARTFN NAME(LDNM)
  229. JUMP_IF_NOT_LSE 8f
  230. LDOP /* s(0), s(0), [x1] */
  231. ret
  232. 8: mov s(tmp0), s(0)
  233. 0: LDXR s(0), [x1]
  234. OP s(tmp1), s(0), s(tmp0)
  235. STXR w(tmp2), s(tmp1), [x1]
  236. cbnz w(tmp2), 0b
  237. ret
  238. ENDFN NAME(LDNM)
  239. #endif
  240. /* GNU_PROPERTY_AARCH64_* macros from elf.h for use in asm code. */
  241. #define FEATURE_1_AND 0xc0000000
  242. #define FEATURE_1_BTI 1
  243. #define FEATURE_1_PAC 2
  244. /* Supported features based on the code generation options. */
  245. #if defined(__ARM_FEATURE_BTI_DEFAULT)
  246. # define BTI_FLAG FEATURE_1_BTI
  247. #else
  248. # define BTI_FLAG 0
  249. #endif
  250. #if __ARM_FEATURE_PAC_DEFAULT & 3
  251. # define PAC_FLAG FEATURE_1_PAC
  252. #else
  253. # define PAC_FLAG 0
  254. #endif
  255. /* Add a NT_GNU_PROPERTY_TYPE_0 note. */
  256. #define GNU_PROPERTY(type, value) \
  257. .section .note.gnu.property, "a"; \
  258. .p2align 3; \
  259. .word 4; \
  260. .word 16; \
  261. .word 5; \
  262. .asciz "GNU"; \
  263. .word type; \
  264. .word 4; \
  265. .word value; \
  266. .word 0;
  267. #if defined(__linux__) || defined(__FreeBSD__)
  268. .section .note.GNU-stack, "", %progbits
  269. /* Add GNU property note if built with branch protection. */
  270. # if (BTI_FLAG|PAC_FLAG) != 0
  271. GNU_PROPERTY (FEATURE_1_AND, BTI_FLAG|PAC_FLAG)
  272. # endif
  273. #endif