lib1funcs.S 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675
  1. /* libgcc routines for C-SKY.
  2. Copyright (C) 2018-2022 Free Software Foundation, Inc.
  3. Contributed by C-SKY Microsystems and Mentor Graphics.
  4. This file is part of GCC.
  5. GCC is free software; you can redistribute it and/or modify it
  6. under the terms of the GNU General Public License as published by the
  7. Free Software Foundation; either version 3, or (at your option) any
  8. later version.
  9. This file is distributed in the hope that it will be useful, but
  10. WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. General Public License for more details.
  13. Under Section 7 of GPL version 3, you are granted additional
  14. permissions described in the GCC Runtime Library Exception, version
  15. 3.1, as published by the Free Software Foundation.
  16. You should have received a copy of the GNU General Public License and
  17. a copy of the GCC Runtime Library Exception along with this program;
  18. see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
  19. <http://www.gnu.org/licenses/>. */
  20. /* Use the right prefix for global labels. */
  21. #define CONCAT1(a, b) CONCAT2(a, b)
  22. #define CONCAT2(a, b) a ## b
  23. #define SYM(x) CONCAT1 (__, x)
  24. #ifndef __CSKYBE__
  25. #define xl r0
  26. #define xh r1
  27. #define yl r2
  28. #define yh r3
  29. #else
  30. #define xh r0
  31. #define xl r1
  32. #define yh r2
  33. #define yl r3
  34. #endif
  35. #ifdef __ELF__
  36. #define TYPE(x) .type SYM (x),@function
  37. #define SIZE(x) .size SYM (x), . - SYM (x)
  38. #else
  39. #define TYPE(x)
  40. #define SIZE(x)
  41. #endif
  42. .macro FUNC_START name
  43. .text
  44. .align 2
  45. .globl SYM (\name)
  46. TYPE (\name)
  47. SYM (\name):
  48. .endm
  49. .macro FUNC_END name
  50. SIZE (\name)
  51. .endm
  52. /* Emulate FF1 ("fast find 1") instruction on ck801.
  53. Result goes in rx, clobbering ry. */
  54. #if defined(__CK801__)
  55. .macro FF1_M rx, ry
  56. movi \rx, 32
  57. 10:
  58. cmphsi \ry, 1
  59. bf 11f
  60. subi \rx, \rx, 1
  61. lsri \ry, \ry, 1
  62. br 10b
  63. 11:
  64. .endm
  65. #else
  66. .macro FF1_M rx, ry
  67. ff1 \rx, \ry
  68. .endm
  69. #endif
  70. /* Likewise emulate lslc instruction ("logical left shift to C") on CK801. */
  71. #if defined(__CK801__)
  72. .macro LSLC_M rx
  73. cmpne \rx, \rx
  74. addc \rx, \rx
  75. .endm
  76. #else
  77. .macro LSLC_M rx
  78. lslc \rx
  79. .endm
  80. #endif
  81. /* Emulate the abs instruction. */
  82. #if defined(__CK802__)
  83. .macro ABS_M rx
  84. btsti \rx, 31
  85. bf 10f
  86. not \rx
  87. addi \rx, 1
  88. 10:
  89. .endm
  90. #elif defined(__CK801__)
  91. .macro ABS_M rx
  92. cmplti \rx, 1
  93. bf 10f
  94. not \rx
  95. addi \rx, 1
  96. 10:
  97. .endm
  98. #else
  99. .macro ABS_M rx
  100. abs \rx
  101. .endm
  102. #endif
  103. /* Emulate the ld.hs ("load signed halfword and extend") instruction
  104. on ck801 and ck802. */
  105. #if defined(__CK801__)
  106. .macro LDBS_M rx, ry
  107. ld.b \rx, (\ry, 0x0)
  108. sextb \rx, \rx
  109. .endm
  110. #else
  111. .macro LDBS_M rx, ry
  112. ld.bs \rx, (\ry, 0x0)
  113. .endm
  114. #endif
  115. #if defined(__CK801__)
  116. .macro LDHS_M rx, ry
  117. ld.h \rx, (\ry, 0x0)
  118. sexth \rx, \rx
  119. .endm
  120. #else
  121. .macro LDHS_M rx, ry
  122. ld.hs \rx, (\ry, 0x0)
  123. .endm
  124. #endif
  125. /* Signed and unsigned div/mod/rem functions. */
  126. #ifdef L_udivsi3
  127. FUNC_START udiv32
  128. FUNC_START udivsi3
  129. cmpnei a1, 0 // look for 0 divisor
  130. bt 9f
  131. trap 3 // divide by 0
  132. 9:
  133. // control iterations, skip across high order 0 bits in dividend
  134. cmpnei a0, 0
  135. bt 8f
  136. jmp lr // 0 dividend quick return
  137. 8:
  138. push l0
  139. movi a2, 1 // a2 is quotient (1 for a sentinel)
  140. mov a3, a0
  141. FF1_M l0, a3 // figure distance to skip
  142. lsl a2, l0 // move the sentinel along (with 0's behind)
  143. lsl a0, l0 // and the low 32 bits of numerator
  144. // FIXME: Is this correct?
  145. mov a3, a1 // looking at divisor
  146. FF1_M l0, a3 // I can move 32-l0 more bits to left.
  147. addi l0, 1 // ok, one short of that...
  148. mov a3, a0
  149. lsr a3, l0 // bits that came from low order...
  150. not l0 // l0 == "32-n" == LEFT distance
  151. addi l0, 33 // this is (32-n)
  152. lsl a2,l0 // fixes the high 32 (quotient)
  153. lsl a0,l0
  154. cmpnei a2,0
  155. bf 4f // the sentinel went away...
  156. // run the remaining bits
  157. 1:
  158. LSLC_M a0 // 1 bit left shift of a3-a0
  159. addc a3, a3
  160. cmphs a3, a1 // upper 32 of dividend >= divisor?
  161. bf 2f
  162. subu a3, a1 // if yes, subtract divisor
  163. 2:
  164. addc a2, a2 // shift by 1 and count subtracts
  165. bf 1b // if sentinel falls out of quotient, stop
  166. 4:
  167. mov a0, a2 // return quotient
  168. mov a1, a3 // and piggyback the remainder
  169. pop l0
  170. FUNC_END udiv32
  171. FUNC_END udivsi3
  172. #endif
  173. #ifdef L_umodsi3
  174. FUNC_START urem32
  175. FUNC_START umodsi3
  176. cmpnei a1, 0 // look for 0 divisor
  177. bt 9f
  178. trap 3 // divide by 0
  179. 9:
  180. // control iterations, skip across high order 0 bits in dividend
  181. cmpnei a0, 0
  182. bt 8f
  183. jmp lr // 0 dividend quick return
  184. 8:
  185. mov a2, a0
  186. FF1_M a3, a2 // figure distance to skip
  187. movi a2, 1 // a2 is quotient (1 for a sentinel)
  188. lsl a2, a3 // move the sentinel along (with 0's behind)
  189. lsl a0, a3 // and the low 32 bits of numerator
  190. movi a3, 0
  191. 1:
  192. LSLC_M a0 // 1 bit left shift of a3-a0
  193. addc a3, a3
  194. cmphs a3, a1 // upper 32 of dividend >= divisor?
  195. bf 2f
  196. subu a3, a1 // if yes, subtract divisor
  197. 2:
  198. addc a2, a2 // shift by 1 and count subtracts
  199. bf 1b // if sentinel falls out of quotient, stop
  200. 4:
  201. mov a0, a3 // and piggyback the remainder
  202. jmp lr
  203. FUNC_END urem32
  204. FUNC_END umodsi3
  205. #endif
  206. #ifdef L_divsi3
  207. FUNC_START div32
  208. FUNC_START divsi3
  209. cmpnei a1, 0 // look for 0 divisor
  210. bt 9f
  211. trap 3 // divide by 0
  212. 9:
  213. // control iterations, skip across high order 0 bits in dividend
  214. cmpnei a0, 0
  215. bt 8f
  216. jmp lr // 0 dividend quick return
  217. 8:
  218. push l0, l1
  219. mov l1, a0
  220. xor l1, a1 // calc sign of quotient
  221. ABS_M a0
  222. ABS_M a1
  223. movi a2, 1 // a2 is quotient (1 for a sentinel)
  224. mov a3, a0
  225. FF1_M l0, a3 // figure distance to skip
  226. lsl a2, l0 // move the sentinel along (with 0's behind)
  227. lsl a0, l0 // and the low 32 bits of numerator
  228. // FIXME: is this correct?
  229. mov a3, a1 // looking at divisor
  230. FF1_M l0, a3 // I can move 32-l0 more bits to left.
  231. addi l0, 1 // ok, one short of that...
  232. mov a3, a0
  233. lsr a3, l0 // bits that came from low order...
  234. not l0 // l0 == "32-n" == LEFT distance
  235. addi l0, 33 // this is (32-n)
  236. lsl a2,l0 // fixes the high 32 (quotient)
  237. lsl a0,l0
  238. cmpnei a2,0
  239. bf 4f // the sentinel went away...
  240. // run the remaining bits
  241. 1:
  242. LSLC_M a0 // 1 bit left shift of a3-a0
  243. addc a3, a3
  244. cmphs a3, a1 // upper 32 of dividend >= divisor?
  245. bf 2f
  246. subu a3, a1 // if yes, subtract divisor
  247. 2:
  248. addc a2, a2 // shift by 1 and count subtracts
  249. bf 1b // if sentinel falls out of quotient, stop
  250. 4:
  251. mov a0, a2 // return quotient
  252. mov a1, a3 // and piggyback the remainder
  253. LSLC_M l1 // after adjusting for sign
  254. bf 3f
  255. not a0
  256. addi a0, 1
  257. not a1
  258. addi a1, 1
  259. 3:
  260. pop l0, l1
  261. FUNC_END div32
  262. FUNC_END divsi3
  263. #endif
  264. #ifdef L_modsi3
  265. FUNC_START rem32
  266. FUNC_START modsi3
  267. push l0
  268. cmpnei a1, 0 // look for 0 divisor
  269. bt 9f
  270. trap 3 // divide by 0
  271. 9:
  272. // control iterations, skip across high order 0 bits in dividend
  273. cmpnei a0, 0
  274. bt 8f
  275. pop l0 // 0 dividend quick return
  276. 8:
  277. mov l0, a0
  278. ABS_M a0
  279. ABS_M a1
  280. mov a2, a0
  281. FF1_M a3, a2 // figure distance to skip
  282. movi a2, 1 // a2 is quotient (1 for a sentinel)
  283. lsl a2, a3 // move the sentinel along (with 0's behind)
  284. lsl a0, a3 // and the low 32 bits of numerator
  285. movi a3, 0
  286. // run the remaining bits
  287. 1:
  288. LSLC_M a0 // 1 bit left shift of a3-a0
  289. addc a3, a3
  290. cmphs a3, a1 // upper 32 of dividend >= divisor?
  291. bf 2f
  292. subu a3, a1 // if yes, subtract divisor
  293. 2:
  294. addc a2, a2 // shift by 1 and count subtracts
  295. bf 1b // if sentinel falls out of quotient, stop
  296. 4:
  297. mov a0, a3 // and piggyback the remainder
  298. LSLC_M l0 // after adjusting for sign
  299. bf 3f
  300. not a0
  301. addi a0, 1
  302. 3:
  303. pop l0
  304. FUNC_END rem32
  305. FUNC_END modsi3
  306. #endif
  307. /* Unordered comparisons for single and double float. */
  308. #ifdef L_unordsf2
  309. FUNC_START unordsf2
  310. #if defined(__CK801__)
  311. subi sp, 4
  312. st.w r4, (sp, 0x0)
  313. lsli r2, r0, 1
  314. lsli r3, r1, 1
  315. asri r4, r2, 24
  316. not r4
  317. cmpnei r4, 0
  318. bt 1f
  319. lsli r4, r0, 9
  320. cmpnei r4, 0
  321. bt 3f
  322. 1:
  323. asri r4, r3, 24
  324. not r4
  325. cmpnei r4, 0
  326. bt 2f
  327. lsli r4, r1, 9
  328. cmpnei r4, 0
  329. bt 3f
  330. 2:
  331. ld.w r4, (sp, 0x0)
  332. addi sp, 4
  333. movi r0, 0
  334. rts
  335. 3:
  336. ld.w r4, (sp, 0x0)
  337. addi sp, 4
  338. movi r0, 1
  339. rts
  340. #elif defined(__CK802__)
  341. lsli r2, r0, 1
  342. lsli r3, r1, 1
  343. asri r2, r2, 24
  344. not r13, r2
  345. cmpnei r13, 0
  346. bt 1f
  347. lsli r13, r0, 9
  348. cmpnei r13, 0
  349. bt 3f
  350. 1:
  351. asri r3, r3, 24
  352. not r13, r3
  353. cmpnei r13, 0
  354. bt 2f
  355. lsli r13, r1, 9
  356. cmpnei r13, 0
  357. bt 3f
  358. 2:
  359. movi r0, 0
  360. rts
  361. 3:
  362. movi r0, 1
  363. rts
  364. #else
  365. lsli r2, r0, 1
  366. lsli r3, r1, 1
  367. asri r2, r2, 24
  368. not r13, r2
  369. bnez r13, 1f
  370. lsli r13, r0, 9
  371. bnez r13, 3f
  372. 1:
  373. asri r3, r3, 24
  374. not r13, r3
  375. bnez r13, 2f
  376. lsli r13, r1, 9
  377. bnez r13, 3f
  378. 2:
  379. movi r0, 0
  380. rts
  381. 3:
  382. movi r0, 1
  383. rts
  384. #endif
  385. FUNC_END unordsf2
  386. #endif
  387. #ifdef L_unorddf2
  388. FUNC_START unorddf2
  389. #if defined(__CK801__)
  390. subi sp, 8
  391. st.w r4, (sp, 0x0)
  392. st.w r5, (sp, 0x4)
  393. lsli r4, xh, 1
  394. asri r4, r4, 21
  395. not r4
  396. cmpnei r4, 0
  397. bt 1f
  398. mov r4, xl
  399. lsli r5, xh, 12
  400. or r4, r5
  401. cmpnei r4, 0
  402. bt 3f
  403. 1:
  404. lsli r4, yh, 1
  405. asri r4, r4, 21
  406. not r4
  407. cmpnei r4, 0
  408. bt 2f
  409. mov r4,yl
  410. lsli r5, yh, 12
  411. or r4, r5
  412. cmpnei r4, 0
  413. bt 3f
  414. 2:
  415. ld.w r4, (sp, 0x0)
  416. ld.w r5, (sp, 0x4)
  417. addi sp, 8
  418. movi r0, 0
  419. rts
  420. 3:
  421. ld.w r4, (sp, 0x0)
  422. ld.w r5, (sp, 0x4)
  423. addi sp, 8
  424. movi r0, 1
  425. rts
  426. #elif defined(__CK802__)
  427. lsli r13, xh, 1
  428. asri r13, r13, 21
  429. not r13
  430. cmpnei r13, 0
  431. bt 1f
  432. lsli xh, xh, 12
  433. or r13, xl, xh
  434. cmpnei r13, 0
  435. bt 3f
  436. 1:
  437. lsli r13, yh, 1
  438. asri r13, r13, 21
  439. not r13
  440. cmpnei r13, 0
  441. bt 2f
  442. lsli yh, yh, 12
  443. or r13, yl, yh
  444. cmpnei r13, 0
  445. bt 3f
  446. 2:
  447. movi r0, 0
  448. rts
  449. 3:
  450. movi r0, 1
  451. rts
  452. #else
  453. lsli r13, xh, 1
  454. asri r13, r13, 21
  455. not r13
  456. bnez r13, 1f
  457. lsli xh, xh, 12
  458. or r13, xl, xh
  459. bnez r13, 3f
  460. 1:
  461. lsli r13, yh, 1
  462. asri r13, r13, 21
  463. not r13
  464. bnez r13, 2f
  465. lsli yh, yh, 12
  466. or r13, yl, yh
  467. bnez r13, 3f
  468. 2:
  469. movi r0, 0
  470. rts
  471. 3:
  472. movi r0, 1
  473. rts
  474. #endif
  475. FUNC_END unorddf2
  476. #endif
  477. /* When optimizing for size on ck801 and ck802, GCC emits calls to the
  478. following helper functions when expanding casesi, instead of emitting
  479. the table lookup and jump inline. Note that in these functions the
  480. jump is handled by tweaking the value of lr before rts. */
  481. #ifdef L_csky_case_sqi
  482. FUNC_START _gnu_csky_case_sqi
  483. subi sp, 4
  484. st.w a1, (sp, 0x0)
  485. mov a1, lr
  486. add a1, a1, a0
  487. LDBS_M a1, a1
  488. lsli a1, a1, 1
  489. add lr, lr, a1
  490. ld.w a1, (sp, 0x0)
  491. addi sp, 4
  492. rts
  493. FUNC_END _gnu_csky_case_sqi
  494. #endif
  495. #ifdef L_csky_case_uqi
  496. FUNC_START _gnu_csky_case_uqi
  497. subi sp, 4
  498. st.w a1, (sp, 0x0)
  499. mov a1, lr
  500. add a1, a1, a0
  501. ld.b a1, (a1, 0x0)
  502. lsli a1, a1, 1
  503. add lr, lr, a1
  504. ld.w a1, (sp, 0x0)
  505. addi sp, 4
  506. rts
  507. FUNC_END _gnu_csky_case_uqi
  508. #endif
  509. #ifdef L_csky_case_shi
  510. FUNC_START _gnu_csky_case_shi
  511. subi sp, 8
  512. st.w a0, (sp, 0x4)
  513. st.w a1, (sp, 0x0)
  514. mov a1, lr
  515. lsli a0, a0, 1
  516. add a1, a1, a0
  517. LDHS_M a1, a1
  518. lsli a1, a1, 1
  519. add lr, lr, a1
  520. ld.w a0, (sp, 0x4)
  521. ld.w a1, (sp, 0x0)
  522. addi sp, 8
  523. rts
  524. FUNC_END _gnu_csky_case_shi
  525. #endif
  526. #ifdef L_csky_case_uhi
  527. FUNC_START _gnu_csky_case_uhi
  528. subi sp, 8
  529. st.w a0, (sp, 0x4)
  530. st.w a1, (sp, 0x0)
  531. mov a1, lr
  532. lsli a0, a0, 1
  533. add a1, a1, a0
  534. ld.h a1, (a1, 0x0)
  535. lsli a1, a1, 1
  536. add lr, lr, a1
  537. ld.w a0, (sp, 0x4)
  538. ld.w a1, (sp, 0x0)
  539. addi sp, 8
  540. rts
  541. FUNC_END _gnu_csky_case_uhi
  542. #endif
  543. #ifdef L_csky_case_si
  544. FUNC_START _gnu_csky_case_si
  545. subi sp, 8
  546. st.w a0, (sp, 0x4)
  547. st.w a1, (sp, 0x0)
  548. mov a1, lr
  549. addi a1, a1, 2 // Align to word.
  550. bclri a1, a1, 1
  551. mov lr, a1
  552. lsli a0, a0, 2
  553. add a1, a1, a0
  554. ld.w a0, (a1, 0x0)
  555. add lr, lr, a0
  556. ld.w a0, (sp, 0x4)
  557. ld.w a1, (sp, 0x0)
  558. addi sp, 8
  559. rts
  560. FUNC_END _gnu_csky_case_si
  561. #endif
  562. /* GCC expects that {__eq,__ne,__gt,__ge,__le,__lt}{df2,sf2}
  563. will behave as __cmpdf2. So, we stub the implementations to
  564. jump on to __cmpdf2 and __cmpsf2.
  565. All of these short-circuit the return path so that __cmp{sd}f2
  566. will go directly back to the caller. */
  567. .macro COMPARE_DF_JUMP name
  568. .import SYM (cmpdf2)
  569. FUNC_START \name
  570. jmpi SYM (cmpdf2)
  571. FUNC_END \name
  572. .endm
  573. #ifdef L_eqdf2
  574. COMPARE_DF_JUMP eqdf2
  575. #endif /* L_eqdf2 */
  576. #ifdef L_nedf2
  577. COMPARE_DF_JUMP nedf2
  578. #endif /* L_nedf2 */
  579. #ifdef L_gtdf2
  580. COMPARE_DF_JUMP gtdf2
  581. #endif /* L_gtdf2 */
  582. #ifdef L_gedf2
  583. COMPARE_DF_JUMP gedf2
  584. #endif /* L_gedf2 */
  585. #ifdef L_ltdf2
  586. COMPARE_DF_JUMP ltdf2
  587. #endif /* L_ltdf2 */
  588. #ifdef L_ledf2
  589. COMPARE_DF_JUMP ledf2
  590. #endif /* L_ledf2 */
  591. /* Single-precision floating point stubs. */
  592. .macro COMPARE_SF_JUMP name
  593. .import SYM (cmpsf2)
  594. FUNC_START \name
  595. jmpi SYM (cmpsf2)
  596. FUNC_END \name
  597. .endm
  598. #ifdef L_eqsf2
  599. COMPARE_SF_JUMP eqsf2
  600. #endif /* L_eqsf2 */
  601. #ifdef L_nesf2
  602. COMPARE_SF_JUMP nesf2
  603. #endif /* L_nesf2 */
  604. #ifdef L_gtsf2
  605. COMPARE_SF_JUMP gtsf2
  606. #endif /* L_gtsf2 */
  607. #ifdef L_gesf2
  608. COMPARE_SF_JUMP __gesf2
  609. #endif /* L_gesf2 */
  610. #ifdef L_ltsf2
  611. COMPARE_SF_JUMP __ltsf2
  612. #endif /* L_ltsf2 */
  613. #ifdef L_lesf2
  614. COMPARE_SF_JUMP lesf2
  615. #endif /* L_lesf2 */