1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933 |
- /* IEEE-754 single-precision functions for Xtensa
- Copyright (C) 2006-2022 Free Software Foundation, Inc.
- Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
- This file is part of GCC.
- GCC is free software; you can redistribute it and/or modify it
- under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 3, or (at your option)
- any later version.
- GCC is distributed in the hope that it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
- License for more details.
- Under Section 7 of GPL version 3, you are granted additional
- permissions described in the GCC Runtime Library Exception, version
- 3.1, as published by the Free Software Foundation.
- You should have received a copy of the GNU General Public License and
- a copy of the GCC Runtime Library Exception along with this program;
- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- <http://www.gnu.org/licenses/>. */
- #ifdef __XTENSA_EB__
- #define xh a2
- #define xl a3
- #define yh a4
- #define yl a5
- #else
- #define xh a3
- #define xl a2
- #define yh a5
- #define yl a4
- #endif
- /* Warning! The branch displacements for some Xtensa branch instructions
- are quite small, and this code has been carefully laid out to keep
- branch targets in range. If you change anything, be sure to check that
- the assembler is not relaxing anything to branch over a jump. */
- #ifdef L_negsf2
- .align 4
- .global __negsf2
- .type __negsf2, @function
- __negsf2:
- leaf_entry sp, 16
- movi a4, 0x80000000
- xor a2, a2, a4
- leaf_return
- #endif /* L_negsf2 */
- #ifdef L_addsubsf3
- .literal_position
- /* Addition */
- __addsf3_aux:
- /* Handle NaNs and Infinities. (This code is placed before the
- start of the function just to keep it in range of the limited
- branch displacements.) */
- .Ladd_xnan_or_inf:
- /* If y is neither Infinity nor NaN, return x. */
- bnall a3, a6, .Ladd_return_nan_or_inf
- /* If x is a NaN, return it. Otherwise, return y. */
- slli a7, a2, 9
- bnez a7, .Ladd_return_nan
- .Ladd_ynan_or_inf:
- /* Return y. */
- mov a2, a3
- .Ladd_return_nan_or_inf:
- slli a7, a2, 9
- bnez a7, .Ladd_return_nan
- leaf_return
- .Ladd_return_nan:
- movi a6, 0x400000 /* make it a quiet NaN */
- or a2, a2, a6
- leaf_return
- .Ladd_opposite_signs:
- /* Operand signs differ. Do a subtraction. */
- slli a7, a6, 8
- xor a3, a3, a7
- j .Lsub_same_sign
- .align 4
- .global __addsf3
- .type __addsf3, @function
- __addsf3:
- leaf_entry sp, 16
- movi a6, 0x7f800000
- /* Check if the two operands have the same sign. */
- xor a7, a2, a3
- bltz a7, .Ladd_opposite_signs
- .Ladd_same_sign:
- /* Check if either exponent == 0x7f8 (i.e., NaN or Infinity). */
- ball a2, a6, .Ladd_xnan_or_inf
- ball a3, a6, .Ladd_ynan_or_inf
- /* Compare the exponents. The smaller operand will be shifted
- right by the exponent difference and added to the larger
- one. */
- extui a7, a2, 23, 9
- extui a8, a3, 23, 9
- bltu a7, a8, .Ladd_shiftx
- .Ladd_shifty:
- /* Check if the smaller (or equal) exponent is zero. */
- bnone a3, a6, .Ladd_yexpzero
- /* Replace y sign/exponent with 0x008. */
- or a3, a3, a6
- slli a3, a3, 8
- srli a3, a3, 8
- .Ladd_yexpdiff:
- /* Compute the exponent difference. */
- sub a10, a7, a8
- /* Exponent difference > 32 -- just return the bigger value. */
- bgeui a10, 32, 1f
-
- /* Shift y right by the exponent difference. Any bits that are
- shifted out of y are saved in a9 for rounding the result. */
- ssr a10
- movi a9, 0
- src a9, a3, a9
- srl a3, a3
- /* Do the addition. */
- add a2, a2, a3
- /* Check if the add overflowed into the exponent. */
- extui a10, a2, 23, 9
- beq a10, a7, .Ladd_round
- mov a8, a7
- j .Ladd_carry
- .Ladd_yexpzero:
- /* y is a subnormal value. Replace its sign/exponent with zero,
- i.e., no implicit "1.0", and increment the apparent exponent
- because subnormals behave as if they had the minimum (nonzero)
- exponent. Test for the case when both exponents are zero. */
- slli a3, a3, 9
- srli a3, a3, 9
- bnone a2, a6, .Ladd_bothexpzero
- addi a8, a8, 1
- j .Ladd_yexpdiff
- .Ladd_bothexpzero:
- /* Both exponents are zero. Handle this as a special case. There
- is no need to shift or round, and the normal code for handling
- a carry into the exponent field will not work because it
- assumes there is an implicit "1.0" that needs to be added. */
- add a2, a2, a3
- 1: leaf_return
- .Ladd_xexpzero:
- /* Same as "yexpzero" except skip handling the case when both
- exponents are zero. */
- slli a2, a2, 9
- srli a2, a2, 9
- addi a7, a7, 1
- j .Ladd_xexpdiff
- .Ladd_shiftx:
- /* Same thing as the "shifty" code, but with x and y swapped. Also,
- because the exponent difference is always nonzero in this version,
- the shift sequence can use SLL and skip loading a constant zero. */
- bnone a2, a6, .Ladd_xexpzero
- or a2, a2, a6
- slli a2, a2, 8
- srli a2, a2, 8
- .Ladd_xexpdiff:
- sub a10, a8, a7
- bgeui a10, 32, .Ladd_returny
-
- ssr a10
- sll a9, a2
- srl a2, a2
- add a2, a2, a3
- /* Check if the add overflowed into the exponent. */
- extui a10, a2, 23, 9
- bne a10, a8, .Ladd_carry
- .Ladd_round:
- /* Round up if the leftover fraction is >= 1/2. */
- bgez a9, 1f
- addi a2, a2, 1
- /* Check if the leftover fraction is exactly 1/2. */
- slli a9, a9, 1
- beqz a9, .Ladd_exactlyhalf
- 1: leaf_return
- .Ladd_returny:
- mov a2, a3
- leaf_return
- .Ladd_carry:
- /* The addition has overflowed into the exponent field, so the
- value needs to be renormalized. The mantissa of the result
- can be recovered by subtracting the original exponent and
- adding 0x800000 (which is the explicit "1.0" for the
- mantissa of the non-shifted operand -- the "1.0" for the
- shifted operand was already added). The mantissa can then
- be shifted right by one bit. The explicit "1.0" of the
- shifted mantissa then needs to be replaced by the exponent,
- incremented by one to account for the normalizing shift.
- It is faster to combine these operations: do the shift first
- and combine the additions and subtractions. If x is the
- original exponent, the result is:
- shifted mantissa - (x << 22) + (1 << 22) + (x << 23)
- or:
- shifted mantissa + ((x + 1) << 22)
- Note that the exponent is incremented here by leaving the
- explicit "1.0" of the mantissa in the exponent field. */
- /* Shift x right by one bit. Save the lsb. */
- mov a10, a2
- srli a2, a2, 1
- /* See explanation above. The original exponent is in a8. */
- addi a8, a8, 1
- slli a8, a8, 22
- add a2, a2, a8
- /* Return an Infinity if the exponent overflowed. */
- ball a2, a6, .Ladd_infinity
-
- /* Same thing as the "round" code except the msb of the leftover
- fraction is bit 0 of a10, with the rest of the fraction in a9. */
- bbci.l a10, 0, 1f
- addi a2, a2, 1
- beqz a9, .Ladd_exactlyhalf
- 1: leaf_return
- .Ladd_infinity:
- /* Clear the mantissa. */
- srli a2, a2, 23
- slli a2, a2, 23
- /* The sign bit may have been lost in a carry-out. Put it back. */
- slli a8, a8, 1
- or a2, a2, a8
- leaf_return
- .Ladd_exactlyhalf:
- /* Round down to the nearest even value. */
- srli a2, a2, 1
- slli a2, a2, 1
- leaf_return
- /* Subtraction */
- __subsf3_aux:
-
- /* Handle NaNs and Infinities. (This code is placed before the
- start of the function just to keep it in range of the limited
- branch displacements.) */
- .Lsub_xnan_or_inf:
- /* If y is neither Infinity nor NaN, return x. */
- bnall a3, a6, .Lsub_return_nan_or_inf
- /* Both x and y are either NaN or Inf, so the result is NaN. */
- .Lsub_return_nan:
- movi a4, 0x400000 /* make it a quiet NaN */
- or a2, a2, a4
- leaf_return
- .Lsub_ynan_or_inf:
- /* Negate y and return it. */
- slli a7, a6, 8
- xor a2, a3, a7
- .Lsub_return_nan_or_inf:
- slli a7, a2, 9
- bnez a7, .Lsub_return_nan
- leaf_return
- .Lsub_opposite_signs:
- /* Operand signs differ. Do an addition. */
- slli a7, a6, 8
- xor a3, a3, a7
- j .Ladd_same_sign
- .align 4
- .global __subsf3
- .type __subsf3, @function
- __subsf3:
- leaf_entry sp, 16
- movi a6, 0x7f800000
- /* Check if the two operands have the same sign. */
- xor a7, a2, a3
- bltz a7, .Lsub_opposite_signs
- .Lsub_same_sign:
- /* Check if either exponent == 0x7f8 (i.e., NaN or Infinity). */
- ball a2, a6, .Lsub_xnan_or_inf
- ball a3, a6, .Lsub_ynan_or_inf
- /* Compare the operands. In contrast to addition, the entire
- value matters here. */
- extui a7, a2, 23, 8
- extui a8, a3, 23, 8
- bltu a2, a3, .Lsub_xsmaller
- .Lsub_ysmaller:
- /* Check if the smaller (or equal) exponent is zero. */
- bnone a3, a6, .Lsub_yexpzero
- /* Replace y sign/exponent with 0x008. */
- or a3, a3, a6
- slli a3, a3, 8
- srli a3, a3, 8
- .Lsub_yexpdiff:
- /* Compute the exponent difference. */
- sub a10, a7, a8
- /* Exponent difference > 32 -- just return the bigger value. */
- bgeui a10, 32, 1f
-
- /* Shift y right by the exponent difference. Any bits that are
- shifted out of y are saved in a9 for rounding the result. */
- ssr a10
- movi a9, 0
- src a9, a3, a9
- srl a3, a3
- sub a2, a2, a3
- /* Subtract the leftover bits in a9 from zero and propagate any
- borrow from a2. */
- neg a9, a9
- addi a10, a2, -1
- movnez a2, a10, a9
- /* Check if the subtract underflowed into the exponent. */
- extui a10, a2, 23, 8
- beq a10, a7, .Lsub_round
- j .Lsub_borrow
- .Lsub_yexpzero:
- /* Return zero if the inputs are equal. (For the non-subnormal
- case, subtracting the "1.0" will cause a borrow from the exponent
- and this case can be detected when handling the borrow.) */
- beq a2, a3, .Lsub_return_zero
- /* y is a subnormal value. Replace its sign/exponent with zero,
- i.e., no implicit "1.0". Unless x is also a subnormal, increment
- y's apparent exponent because subnormals behave as if they had
- the minimum (nonzero) exponent. */
- slli a3, a3, 9
- srli a3, a3, 9
- bnone a2, a6, .Lsub_yexpdiff
- addi a8, a8, 1
- j .Lsub_yexpdiff
- .Lsub_returny:
- /* Negate and return y. */
- slli a7, a6, 8
- xor a2, a3, a7
- 1: leaf_return
- .Lsub_xsmaller:
- /* Same thing as the "ysmaller" code, but with x and y swapped and
- with y negated. */
- bnone a2, a6, .Lsub_xexpzero
- or a2, a2, a6
- slli a2, a2, 8
- srli a2, a2, 8
- .Lsub_xexpdiff:
- sub a10, a8, a7
- bgeui a10, 32, .Lsub_returny
-
- ssr a10
- movi a9, 0
- src a9, a2, a9
- srl a2, a2
- /* Negate y. */
- slli a11, a6, 8
- xor a3, a3, a11
- sub a2, a3, a2
- neg a9, a9
- addi a10, a2, -1
- movnez a2, a10, a9
- /* Check if the subtract underflowed into the exponent. */
- extui a10, a2, 23, 8
- bne a10, a8, .Lsub_borrow
- .Lsub_round:
- /* Round up if the leftover fraction is >= 1/2. */
- bgez a9, 1f
- addi a2, a2, 1
- /* Check if the leftover fraction is exactly 1/2. */
- slli a9, a9, 1
- beqz a9, .Lsub_exactlyhalf
- 1: leaf_return
- .Lsub_xexpzero:
- /* Same as "yexpzero". */
- beq a2, a3, .Lsub_return_zero
- slli a2, a2, 9
- srli a2, a2, 9
- bnone a3, a6, .Lsub_xexpdiff
- addi a7, a7, 1
- j .Lsub_xexpdiff
- .Lsub_return_zero:
- movi a2, 0
- leaf_return
- .Lsub_borrow:
- /* The subtraction has underflowed into the exponent field, so the
- value needs to be renormalized. Shift the mantissa left as
- needed to remove any leading zeros and adjust the exponent
- accordingly. If the exponent is not large enough to remove
- all the leading zeros, the result will be a subnormal value. */
- slli a8, a2, 9
- beqz a8, .Lsub_xzero
- do_nsau a6, a8, a7, a11
- srli a8, a8, 9
- bge a6, a10, .Lsub_subnormal
- addi a6, a6, 1
- .Lsub_normalize_shift:
- /* Shift the mantissa (a8/a9) left by a6. */
- ssl a6
- src a8, a8, a9
- sll a9, a9
- /* Combine the shifted mantissa with the sign and exponent,
- decrementing the exponent by a6. (The exponent has already
- been decremented by one due to the borrow from the subtraction,
- but adding the mantissa will increment the exponent by one.) */
- srli a2, a2, 23
- sub a2, a2, a6
- slli a2, a2, 23
- add a2, a2, a8
- j .Lsub_round
- .Lsub_exactlyhalf:
- /* Round down to the nearest even value. */
- srli a2, a2, 1
- slli a2, a2, 1
- leaf_return
- .Lsub_xzero:
- /* If there was a borrow from the exponent, and the mantissa and
- guard digits are all zero, then the inputs were equal and the
- result should be zero. */
- beqz a9, .Lsub_return_zero
- /* Only the guard digit is nonzero. Shift by min(24, a10). */
- addi a11, a10, -24
- movi a6, 24
- movltz a6, a10, a11
- j .Lsub_normalize_shift
- .Lsub_subnormal:
- /* The exponent is too small to shift away all the leading zeros.
- Set a6 to the current exponent (which has already been
- decremented by the borrow) so that the exponent of the result
- will be zero. Do not add 1 to a6 in this case, because: (1)
- adding the mantissa will not increment the exponent, so there is
- no need to subtract anything extra from the exponent to
- compensate, and (2) the effective exponent of a subnormal is 1
- not 0 so the shift amount must be 1 smaller than normal. */
- mov a6, a10
- j .Lsub_normalize_shift
- #endif /* L_addsubsf3 */
- #ifdef L_mulsf3
- /* Multiplication */
- #if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
- #define XCHAL_NO_MUL 1
- #endif
- .literal_position
- __mulsf3_aux:
- /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
- (This code is placed before the start of the function just to
- keep it in range of the limited branch displacements.) */
- .Lmul_xexpzero:
- /* Clear the sign bit of x. */
- slli a2, a2, 1
- srli a2, a2, 1
- /* If x is zero, return zero. */
- beqz a2, .Lmul_return_zero
- /* Normalize x. Adjust the exponent in a8. */
- do_nsau a10, a2, a11, a12
- addi a10, a10, -8
- ssl a10
- sll a2, a2
- movi a8, 1
- sub a8, a8, a10
- j .Lmul_xnormalized
-
- .Lmul_yexpzero:
- /* Clear the sign bit of y. */
- slli a3, a3, 1
- srli a3, a3, 1
- /* If y is zero, return zero. */
- beqz a3, .Lmul_return_zero
- /* Normalize y. Adjust the exponent in a9. */
- do_nsau a10, a3, a11, a12
- addi a10, a10, -8
- ssl a10
- sll a3, a3
- movi a9, 1
- sub a9, a9, a10
- j .Lmul_ynormalized
- .Lmul_return_zero:
- /* Return zero with the appropriate sign bit. */
- srli a2, a7, 31
- slli a2, a2, 31
- j .Lmul_done
- .Lmul_xnan_or_inf:
- /* If y is zero, return NaN. */
- slli a8, a3, 1
- beqz a8, .Lmul_return_nan
- /* If y is NaN, return y. */
- bnall a3, a6, .Lmul_returnx
- slli a8, a3, 9
- beqz a8, .Lmul_returnx
- .Lmul_returny:
- mov a2, a3
- .Lmul_returnx:
- slli a8, a2, 9
- bnez a8, .Lmul_return_nan
- /* Set the sign bit and return. */
- extui a7, a7, 31, 1
- slli a2, a2, 1
- ssai 1
- src a2, a7, a2
- j .Lmul_done
- .Lmul_ynan_or_inf:
- /* If x is zero, return NaN. */
- slli a8, a2, 1
- bnez a8, .Lmul_returny
- mov a2, a3
- .Lmul_return_nan:
- movi a4, 0x400000 /* make it a quiet NaN */
- or a2, a2, a4
- j .Lmul_done
- .align 4
- .global __mulsf3
- .type __mulsf3, @function
- __mulsf3:
- #if __XTENSA_CALL0_ABI__
- leaf_entry sp, 32
- addi sp, sp, -32
- s32i a12, sp, 16
- s32i a13, sp, 20
- s32i a14, sp, 24
- s32i a15, sp, 28
- #elif XCHAL_NO_MUL
- /* This is not really a leaf function; allocate enough stack space
- to allow CALL12s to a helper function. */
- leaf_entry sp, 64
- #else
- leaf_entry sp, 32
- #endif
- movi a6, 0x7f800000
- /* Get the sign of the result. */
- xor a7, a2, a3
- /* Check for NaN and infinity. */
- ball a2, a6, .Lmul_xnan_or_inf
- ball a3, a6, .Lmul_ynan_or_inf
- /* Extract the exponents. */
- extui a8, a2, 23, 8
- extui a9, a3, 23, 8
- beqz a8, .Lmul_xexpzero
- .Lmul_xnormalized:
- beqz a9, .Lmul_yexpzero
- .Lmul_ynormalized:
- /* Add the exponents. */
- add a8, a8, a9
- /* Replace sign/exponent fields with explicit "1.0". */
- movi a10, 0xffffff
- or a2, a2, a6
- and a2, a2, a10
- or a3, a3, a6
- and a3, a3, a10
- /* Multiply 32x32 to 64 bits. The result ends up in a2/a6. */
- #if XCHAL_HAVE_MUL32_HIGH
- mull a6, a2, a3
- muluh a2, a2, a3
- #else
- /* Break the inputs into 16-bit chunks and compute 4 32-bit partial
- products. These partial products are:
- 0 xl * yl
- 1 xl * yh
- 2 xh * yl
- 3 xh * yh
- If using the Mul16 or Mul32 multiplier options, these input
- chunks must be stored in separate registers. For Mac16, the
- UMUL.AA.* opcodes can specify that the inputs come from either
- half of the registers, so there is no need to shift them out
- ahead of time. If there is no multiply hardware, the 16-bit
- chunks can be extracted when setting up the arguments to the
- separate multiply function. */
- #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
- /* Calling a separate multiply function will clobber a0 and requires
- use of a8 as a temporary, so save those values now. (The function
- uses a custom ABI so nothing else needs to be saved.) */
- s32i a0, sp, 0
- s32i a8, sp, 4
- #endif
- #if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
- #define a2h a4
- #define a3h a5
- /* Get the high halves of the inputs into registers. */
- srli a2h, a2, 16
- srli a3h, a3, 16
- #define a2l a2
- #define a3l a3
- #if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
- /* Clear the high halves of the inputs. This does not matter
- for MUL16 because the high bits are ignored. */
- extui a2, a2, 0, 16
- extui a3, a3, 0, 16
- #endif
- #endif /* MUL16 || MUL32 */
- #if XCHAL_HAVE_MUL16
- #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
- mul16u dst, xreg ## xhalf, yreg ## yhalf
- #elif XCHAL_HAVE_MUL32
- #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
- mull dst, xreg ## xhalf, yreg ## yhalf
- #elif XCHAL_HAVE_MAC16
- /* The preprocessor insists on inserting a space when concatenating after
- a period in the definition of do_mul below. These macros are a workaround
- using underscores instead of periods when doing the concatenation. */
- #define umul_aa_ll umul.aa.ll
- #define umul_aa_lh umul.aa.lh
- #define umul_aa_hl umul.aa.hl
- #define umul_aa_hh umul.aa.hh
- #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
- umul_aa_ ## xhalf ## yhalf xreg, yreg; \
- rsr dst, ACCLO
- #else /* no multiply hardware */
-
- #define set_arg_l(dst, src) \
- extui dst, src, 0, 16
- #define set_arg_h(dst, src) \
- srli dst, src, 16
- #if __XTENSA_CALL0_ABI__
- #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
- set_arg_ ## xhalf (a13, xreg); \
- set_arg_ ## yhalf (a14, yreg); \
- call0 .Lmul_mulsi3; \
- mov dst, a12
- #else
- #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
- set_arg_ ## xhalf (a14, xreg); \
- set_arg_ ## yhalf (a15, yreg); \
- call12 .Lmul_mulsi3; \
- mov dst, a14
- #endif /* __XTENSA_CALL0_ABI__ */
- #endif /* no multiply hardware */
- /* Add pp1 and pp2 into a6 with carry-out in a9. */
- do_mul(a6, a2, l, a3, h) /* pp 1 */
- do_mul(a11, a2, h, a3, l) /* pp 2 */
- movi a9, 0
- add a6, a6, a11
- bgeu a6, a11, 1f
- addi a9, a9, 1
- 1:
- /* Shift the high half of a9/a6 into position in a9. Note that
- this value can be safely incremented without any carry-outs. */
- ssai 16
- src a9, a9, a6
- /* Compute the low word into a6. */
- do_mul(a11, a2, l, a3, l) /* pp 0 */
- sll a6, a6
- add a6, a6, a11
- bgeu a6, a11, 1f
- addi a9, a9, 1
- 1:
- /* Compute the high word into a2. */
- do_mul(a2, a2, h, a3, h) /* pp 3 */
- add a2, a2, a9
-
- #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
- /* Restore values saved on the stack during the multiplication. */
- l32i a0, sp, 0
- l32i a8, sp, 4
- #endif
- #endif /* ! XCHAL_HAVE_MUL32_HIGH */
- /* Shift left by 9 bits, unless there was a carry-out from the
- multiply, in which case, shift by 8 bits and increment the
- exponent. */
- movi a4, 9
- srli a5, a2, 24 - 9
- beqz a5, 1f
- addi a4, a4, -1
- addi a8, a8, 1
- 1: ssl a4
- src a2, a2, a6
- sll a6, a6
- /* Subtract the extra bias from the exponent sum (plus one to account
- for the explicit "1.0" of the mantissa that will be added to the
- exponent in the final result). */
- movi a4, 0x80
- sub a8, a8, a4
-
- /* Check for over/underflow. The value in a8 is one less than the
- final exponent, so values in the range 0..fd are OK here. */
- movi a4, 0xfe
- bgeu a8, a4, .Lmul_overflow
-
- .Lmul_round:
- /* Round. */
- bgez a6, .Lmul_rounded
- addi a2, a2, 1
- slli a6, a6, 1
- beqz a6, .Lmul_exactlyhalf
- .Lmul_rounded:
- /* Add the exponent to the mantissa. */
- slli a8, a8, 23
- add a2, a2, a8
- .Lmul_addsign:
- /* Add the sign bit. */
- srli a7, a7, 31
- slli a7, a7, 31
- or a2, a2, a7
- .Lmul_done:
- #if __XTENSA_CALL0_ABI__
- l32i a12, sp, 16
- l32i a13, sp, 20
- l32i a14, sp, 24
- l32i a15, sp, 28
- addi sp, sp, 32
- #endif
- leaf_return
- .Lmul_exactlyhalf:
- /* Round down to the nearest even value. */
- srli a2, a2, 1
- slli a2, a2, 1
- j .Lmul_rounded
- .Lmul_overflow:
- bltz a8, .Lmul_underflow
- /* Return +/- Infinity. */
- movi a8, 0xff
- slli a2, a8, 23
- j .Lmul_addsign
- .Lmul_underflow:
- /* Create a subnormal value, where the exponent field contains zero,
- but the effective exponent is 1. The value of a8 is one less than
- the actual exponent, so just negate it to get the shift amount. */
- neg a8, a8
- mov a9, a6
- ssr a8
- bgeui a8, 32, .Lmul_flush_to_zero
-
- /* Shift a2 right. Any bits that are shifted out of a2 are saved
- in a6 (combined with the shifted-out bits currently in a6) for
- rounding the result. */
- sll a6, a2
- srl a2, a2
- /* Set the exponent to zero. */
- movi a8, 0
- /* Pack any nonzero bits shifted out into a6. */
- beqz a9, .Lmul_round
- movi a9, 1
- or a6, a6, a9
- j .Lmul_round
-
- .Lmul_flush_to_zero:
- /* Return zero with the appropriate sign bit. */
- srli a2, a7, 31
- slli a2, a2, 31
- j .Lmul_done
- #if XCHAL_NO_MUL
-
- /* For Xtensa processors with no multiply hardware, this simplified
- version of _mulsi3 is used for multiplying 16-bit chunks of
- the floating-point mantissas. When using CALL0, this function
- uses a custom ABI: the inputs are passed in a13 and a14, the
- result is returned in a12, and a8 and a15 are clobbered. */
- .align 4
- .Lmul_mulsi3:
- leaf_entry sp, 16
- .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
- movi \dst, 0
- 1: add \tmp1, \src2, \dst
- extui \tmp2, \src1, 0, 1
- movnez \dst, \tmp1, \tmp2
- do_addx2 \tmp1, \src2, \dst, \tmp1
- extui \tmp2, \src1, 1, 1
- movnez \dst, \tmp1, \tmp2
- do_addx4 \tmp1, \src2, \dst, \tmp1
- extui \tmp2, \src1, 2, 1
- movnez \dst, \tmp1, \tmp2
- do_addx8 \tmp1, \src2, \dst, \tmp1
- extui \tmp2, \src1, 3, 1
- movnez \dst, \tmp1, \tmp2
- srli \src1, \src1, 4
- slli \src2, \src2, 4
- bnez \src1, 1b
- .endm
- #if __XTENSA_CALL0_ABI__
- mul_mulsi3_body a12, a13, a14, a15, a8
- #else
- /* The result will be written into a2, so save that argument in a4. */
- mov a4, a2
- mul_mulsi3_body a2, a4, a3, a5, a6
- #endif
- leaf_return
- #endif /* XCHAL_NO_MUL */
- #endif /* L_mulsf3 */
- #ifdef L_divsf3
- /* Division */
- #if XCHAL_HAVE_FP_DIV
- .align 4
- .global __divsf3
- .type __divsf3, @function
- __divsf3:
- leaf_entry sp, 16
- wfr f1, a2 /* dividend */
- wfr f2, a3 /* divisor */
- div0.s f3, f2
- nexp01.s f4, f2
- const.s f5, 1
- maddn.s f5, f4, f3
- mov.s f6, f3
- mov.s f7, f2
- nexp01.s f2, f1
- maddn.s f6, f5, f6
- const.s f5, 1
- const.s f0, 0
- neg.s f8, f2
- maddn.s f5, f4, f6
- maddn.s f0, f8, f3
- mkdadj.s f7, f1
- maddn.s f6, f5, f6
- maddn.s f8, f4, f0
- const.s f3, 1
- maddn.s f3, f4, f6
- maddn.s f0, f8, f6
- neg.s f2, f2
- maddn.s f6, f3, f6
- maddn.s f2, f4, f0
- addexpm.s f0, f7
- addexp.s f6, f7
- divn.s f0, f2, f6
- rfr a2, f0
- leaf_return
- #else
- .literal_position
- __divsf3_aux:
- /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
- (This code is placed before the start of the function just to
- keep it in range of the limited branch displacements.) */
- .Ldiv_yexpzero:
- /* Clear the sign bit of y. */
- slli a3, a3, 1
- srli a3, a3, 1
- /* Check for division by zero. */
- beqz a3, .Ldiv_yzero
- /* Normalize y. Adjust the exponent in a9. */
- do_nsau a10, a3, a4, a5
- addi a10, a10, -8
- ssl a10
- sll a3, a3
- movi a9, 1
- sub a9, a9, a10
- j .Ldiv_ynormalized
- .Ldiv_yzero:
- /* y is zero. Return NaN if x is also zero; otherwise, infinity. */
- slli a4, a2, 1
- srli a4, a4, 1
- srli a2, a7, 31
- slli a2, a2, 31
- or a2, a2, a6
- bnez a4, 1f
- movi a4, 0x400000 /* make it a quiet NaN */
- or a2, a2, a4
- 1: leaf_return
- .Ldiv_xexpzero:
- /* Clear the sign bit of x. */
- slli a2, a2, 1
- srli a2, a2, 1
- /* If x is zero, return zero. */
- beqz a2, .Ldiv_return_zero
- /* Normalize x. Adjust the exponent in a8. */
- do_nsau a10, a2, a4, a5
- addi a10, a10, -8
- ssl a10
- sll a2, a2
- movi a8, 1
- sub a8, a8, a10
- j .Ldiv_xnormalized
-
- .Ldiv_return_zero:
- /* Return zero with the appropriate sign bit. */
- srli a2, a7, 31
- slli a2, a2, 31
- leaf_return
- .Ldiv_xnan_or_inf:
- /* Set the sign bit of the result. */
- srli a7, a3, 31
- slli a7, a7, 31
- xor a2, a2, a7
- /* If y is NaN or Inf, return NaN. */
- ball a3, a6, .Ldiv_return_nan
- slli a7, a2, 9
- bnez a7, .Ldiv_return_nan
- leaf_return
- .Ldiv_ynan_or_inf:
- /* If y is Infinity, return zero. */
- slli a8, a3, 9
- beqz a8, .Ldiv_return_zero
- /* y is NaN; return it. */
- mov a2, a3
- .Ldiv_return_nan:
- movi a4, 0x400000 /* make it a quiet NaN */
- or a2, a2, a4
- leaf_return
- .align 4
- .global __divsf3
- .type __divsf3, @function
- __divsf3:
- leaf_entry sp, 16
- movi a6, 0x7f800000
- /* Get the sign of the result. */
- xor a7, a2, a3
- /* Check for NaN and infinity. */
- ball a2, a6, .Ldiv_xnan_or_inf
- ball a3, a6, .Ldiv_ynan_or_inf
- /* Extract the exponents. */
- extui a8, a2, 23, 8
- extui a9, a3, 23, 8
- beqz a9, .Ldiv_yexpzero
- .Ldiv_ynormalized:
- beqz a8, .Ldiv_xexpzero
- .Ldiv_xnormalized:
- /* Subtract the exponents. */
- sub a8, a8, a9
- /* Replace sign/exponent fields with explicit "1.0". */
- movi a10, 0xffffff
- or a2, a2, a6
- and a2, a2, a10
- or a3, a3, a6
- and a3, a3, a10
- /* The first digit of the mantissa division must be a one.
- Shift x (and adjust the exponent) as needed to make this true. */
- bltu a3, a2, 1f
- slli a2, a2, 1
- addi a8, a8, -1
- 1:
- /* Do the first subtraction and shift. */
- sub a2, a2, a3
- slli a2, a2, 1
- /* Put the quotient into a10. */
- movi a10, 1
- /* Divide one bit at a time for 23 bits. */
- movi a9, 23
- #if XCHAL_HAVE_LOOPS
- loop a9, .Ldiv_loopend
- #endif
- .Ldiv_loop:
- /* Shift the quotient << 1. */
- slli a10, a10, 1
- /* Is this digit a 0 or 1? */
- bltu a2, a3, 1f
- /* Output a 1 and subtract. */
- addi a10, a10, 1
- sub a2, a2, a3
- /* Shift the dividend << 1. */
- 1: slli a2, a2, 1
- #if !XCHAL_HAVE_LOOPS
- addi a9, a9, -1
- bnez a9, .Ldiv_loop
- #endif
- .Ldiv_loopend:
- /* Add the exponent bias (less one to account for the explicit "1.0"
- of the mantissa that will be added to the exponent in the final
- result). */
- addi a8, a8, 0x7e
-
- /* Check for over/underflow. The value in a8 is one less than the
- final exponent, so values in the range 0..fd are OK here. */
- movi a4, 0xfe
- bgeu a8, a4, .Ldiv_overflow
-
- .Ldiv_round:
- /* Round. The remainder (<< 1) is in a2. */
- bltu a2, a3, .Ldiv_rounded
- addi a10, a10, 1
- beq a2, a3, .Ldiv_exactlyhalf
- .Ldiv_rounded:
- /* Add the exponent to the mantissa. */
- slli a8, a8, 23
- add a2, a10, a8
- .Ldiv_addsign:
- /* Add the sign bit. */
- srli a7, a7, 31
- slli a7, a7, 31
- or a2, a2, a7
- leaf_return
- .Ldiv_overflow:
- bltz a8, .Ldiv_underflow
- /* Return +/- Infinity. */
- addi a8, a4, 1 /* 0xff */
- slli a2, a8, 23
- j .Ldiv_addsign
- .Ldiv_exactlyhalf:
- /* Remainder is exactly half the divisor. Round even. */
- srli a10, a10, 1
- slli a10, a10, 1
- j .Ldiv_rounded
- .Ldiv_underflow:
- /* Create a subnormal value, where the exponent field contains zero,
- but the effective exponent is 1. The value of a8 is one less than
- the actual exponent, so just negate it to get the shift amount. */
- neg a8, a8
- ssr a8
- bgeui a8, 32, .Ldiv_flush_to_zero
-
- /* Shift a10 right. Any bits that are shifted out of a10 are
- saved in a6 for rounding the result. */
- sll a6, a10
- srl a10, a10
- /* Set the exponent to zero. */
- movi a8, 0
- /* Pack any nonzero remainder (in a2) into a6. */
- beqz a2, 1f
- movi a9, 1
- or a6, a6, a9
-
- /* Round a10 based on the bits shifted out into a6. */
- 1: bgez a6, .Ldiv_rounded
- addi a10, a10, 1
- slli a6, a6, 1
- bnez a6, .Ldiv_rounded
- srli a10, a10, 1
- slli a10, a10, 1
- j .Ldiv_rounded
- .Ldiv_flush_to_zero:
- /* Return zero with the appropriate sign bit. */
- srli a2, a7, 31
- slli a2, a2, 31
- leaf_return
- #endif /* XCHAL_HAVE_FP_DIV */
- #endif /* L_divsf3 */
- #ifdef L_cmpsf2
- /* Equal and Not Equal */
- .align 4
- .global __eqsf2
- .global __nesf2
- .set __nesf2, __eqsf2
- .type __eqsf2, @function
- __eqsf2:
- leaf_entry sp, 16
- bne a2, a3, 4f
- /* The values are equal but NaN != NaN. Check the exponent. */
- movi a6, 0x7f800000
- ball a2, a6, 3f
- /* Equal. */
- movi a2, 0
- leaf_return
- /* Not equal. */
- 2: movi a2, 1
- leaf_return
- /* Check if the mantissas are nonzero. */
- 3: slli a7, a2, 9
- j 5f
- /* Check if x and y are zero with different signs. */
- 4: or a7, a2, a3
- slli a7, a7, 1
- /* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa
- or x when exponent(x) = 0x7f8 and x == y. */
- 5: movi a2, 0
- movi a3, 1
- movnez a2, a3, a7
- leaf_return
- /* Greater Than */
- .align 4
- .global __gtsf2
- .type __gtsf2, @function
- __gtsf2:
- leaf_entry sp, 16
- movi a6, 0x7f800000
- ball a2, a6, 2f
- 1: bnall a3, a6, .Lle_cmp
- /* Check if y is a NaN. */
- slli a7, a3, 9
- beqz a7, .Lle_cmp
- movi a2, 0
- leaf_return
- /* Check if x is a NaN. */
- 2: slli a7, a2, 9
- beqz a7, 1b
- movi a2, 0
- leaf_return
- /* Less Than or Equal */
- .align 4
- .global __lesf2
- .type __lesf2, @function
- __lesf2:
- leaf_entry sp, 16
- movi a6, 0x7f800000
- ball a2, a6, 2f
- 1: bnall a3, a6, .Lle_cmp
- /* Check if y is a NaN. */
- slli a7, a3, 9
- beqz a7, .Lle_cmp
- movi a2, 1
- leaf_return
- /* Check if x is a NaN. */
- 2: slli a7, a2, 9
- beqz a7, 1b
- movi a2, 1
- leaf_return
- .Lle_cmp:
- /* Check if x and y have different signs. */
- xor a7, a2, a3
- bltz a7, .Lle_diff_signs
- /* Check if x is negative. */
- bltz a2, .Lle_xneg
- /* Check if x <= y. */
- bltu a3, a2, 5f
- 4: movi a2, 0
- leaf_return
- .Lle_xneg:
- /* Check if y <= x. */
- bgeu a2, a3, 4b
- 5: movi a2, 1
- leaf_return
- .Lle_diff_signs:
- bltz a2, 4b
- /* Check if both x and y are zero. */
- or a7, a2, a3
- slli a7, a7, 1
- movi a2, 1
- movi a3, 0
- moveqz a2, a3, a7
- leaf_return
- /* Greater Than or Equal */
- .align 4
- .global __gesf2
- .type __gesf2, @function
- __gesf2:
- leaf_entry sp, 16
- movi a6, 0x7f800000
- ball a2, a6, 2f
- 1: bnall a3, a6, .Llt_cmp
- /* Check if y is a NaN. */
- slli a7, a3, 9
- beqz a7, .Llt_cmp
- movi a2, -1
- leaf_return
- /* Check if x is a NaN. */
- 2: slli a7, a2, 9
- beqz a7, 1b
- movi a2, -1
- leaf_return
- /* Less Than */
- .align 4
- .global __ltsf2
- .type __ltsf2, @function
- __ltsf2:
- leaf_entry sp, 16
- movi a6, 0x7f800000
- ball a2, a6, 2f
- 1: bnall a3, a6, .Llt_cmp
- /* Check if y is a NaN. */
- slli a7, a3, 9
- beqz a7, .Llt_cmp
- movi a2, 0
- leaf_return
- /* Check if x is a NaN. */
- 2: slli a7, a2, 9
- beqz a7, 1b
- movi a2, 0
- leaf_return
- .Llt_cmp:
- /* Check if x and y have different signs. */
- xor a7, a2, a3
- bltz a7, .Llt_diff_signs
- /* Check if x is negative. */
- bltz a2, .Llt_xneg
- /* Check if x < y. */
- bgeu a2, a3, 5f
- 4: movi a2, -1
- leaf_return
- .Llt_xneg:
- /* Check if y < x. */
- bltu a3, a2, 4b
- 5: movi a2, 0
- leaf_return
- .Llt_diff_signs:
- bgez a2, 5b
- /* Check if both x and y are nonzero. */
- or a7, a2, a3
- slli a7, a7, 1
- movi a2, 0
- movi a3, -1
- movnez a2, a3, a7
- leaf_return
- /* Unordered */
- .align 4
- .global __unordsf2
- .type __unordsf2, @function
- __unordsf2:
- leaf_entry sp, 16
- movi a6, 0x7f800000
- ball a2, a6, 3f
- 1: ball a3, a6, 4f
- 2: movi a2, 0
- leaf_return
- 3: slli a7, a2, 9
- beqz a7, 1b
- movi a2, 1
- leaf_return
- 4: slli a7, a3, 9
- beqz a7, 2b
- movi a2, 1
- leaf_return
- #endif /* L_cmpsf2 */
- #ifdef L_fixsfsi
- .align 4
- .global __fixsfsi
- .type __fixsfsi, @function
- __fixsfsi:
- leaf_entry sp, 16
- /* Check for NaN and Infinity. */
- movi a6, 0x7f800000
- ball a2, a6, .Lfixsfsi_nan_or_inf
- /* Extract the exponent and check if 0 < (exp - 0x7e) < 32. */
- extui a4, a2, 23, 8
- addi a4, a4, -0x7e
- bgei a4, 32, .Lfixsfsi_maxint
- blti a4, 1, .Lfixsfsi_zero
- /* Add explicit "1.0" and shift << 8. */
- or a7, a2, a6
- slli a5, a7, 8
- /* Shift back to the right, based on the exponent. */
- ssl a4 /* shift by 32 - a4 */
- srl a5, a5
- /* Negate the result if sign != 0. */
- neg a2, a5
- movgez a2, a5, a7
- leaf_return
- .Lfixsfsi_nan_or_inf:
- /* Handle Infinity and NaN. */
- slli a4, a2, 9
- beqz a4, .Lfixsfsi_maxint
- /* Translate NaN to +maxint. */
- movi a2, 0
- .Lfixsfsi_maxint:
- slli a4, a6, 8 /* 0x80000000 */
- addi a5, a4, -1 /* 0x7fffffff */
- movgez a4, a5, a2
- mov a2, a4
- leaf_return
- .Lfixsfsi_zero:
- movi a2, 0
- leaf_return
- #endif /* L_fixsfsi */
- #ifdef L_fixsfdi
- .align 4
- .global __fixsfdi
- .type __fixsfdi, @function
- __fixsfdi:
- leaf_entry sp, 16
- /* Check for NaN and Infinity. */
- movi a6, 0x7f800000
- ball a2, a6, .Lfixsfdi_nan_or_inf
- /* Extract the exponent and check if 0 < (exp - 0x7e) < 64. */
- extui a4, a2, 23, 8
- addi a4, a4, -0x7e
- bgei a4, 64, .Lfixsfdi_maxint
- blti a4, 1, .Lfixsfdi_zero
- /* Add explicit "1.0" and shift << 8. */
- or a7, a2, a6
- slli xh, a7, 8
- /* Shift back to the right, based on the exponent. */
- ssl a4 /* shift by 64 - a4 */
- bgei a4, 32, .Lfixsfdi_smallshift
- srl xl, xh
- movi xh, 0
- .Lfixsfdi_shifted:
- /* Negate the result if sign != 0. */
- bgez a7, 1f
- neg xl, xl
- neg xh, xh
- beqz xl, 1f
- addi xh, xh, -1
- 1: leaf_return
- .Lfixsfdi_smallshift:
- movi xl, 0
- sll xl, xh
- srl xh, xh
- j .Lfixsfdi_shifted
- .Lfixsfdi_nan_or_inf:
- /* Handle Infinity and NaN. */
- slli a4, a2, 9
- beqz a4, .Lfixsfdi_maxint
- /* Translate NaN to +maxint. */
- movi a2, 0
- .Lfixsfdi_maxint:
- slli a7, a6, 8 /* 0x80000000 */
- bgez a2, 1f
- mov xh, a7
- movi xl, 0
- leaf_return
- 1: addi xh, a7, -1 /* 0x7fffffff */
- movi xl, -1
- leaf_return
- .Lfixsfdi_zero:
- movi xh, 0
- movi xl, 0
- leaf_return
- #endif /* L_fixsfdi */
- #ifdef L_fixunssfsi
- .align 4
- .global __fixunssfsi
- .type __fixunssfsi, @function
- __fixunssfsi:
- leaf_entry sp, 16
- /* Check for NaN and Infinity. */
- movi a6, 0x7f800000
- ball a2, a6, .Lfixunssfsi_nan_or_inf
- /* Extract the exponent and check if 0 <= (exp - 0x7f) < 32. */
- extui a4, a2, 23, 8
- addi a4, a4, -0x7f
- bgei a4, 32, .Lfixunssfsi_maxint
- bltz a4, .Lfixunssfsi_zero
- /* Add explicit "1.0" and shift << 8. */
- or a7, a2, a6
- slli a5, a7, 8
- /* Shift back to the right, based on the exponent. */
- addi a4, a4, 1
- beqi a4, 32, .Lfixunssfsi_bigexp
- ssl a4 /* shift by 32 - a4 */
- srl a5, a5
- /* Negate the result if sign != 0. */
- neg a2, a5
- movgez a2, a5, a7
- leaf_return
- .Lfixunssfsi_nan_or_inf:
- /* Handle Infinity and NaN. */
- slli a4, a2, 9
- beqz a4, .Lfixunssfsi_maxint
- /* Translate NaN to 0xffffffff. */
- movi a2, -1
- leaf_return
- .Lfixunssfsi_maxint:
- slli a4, a6, 8 /* 0x80000000 */
- movi a5, -1 /* 0xffffffff */
- movgez a4, a5, a2
- mov a2, a4
- leaf_return
- .Lfixunssfsi_zero:
- movi a2, 0
- leaf_return
- .Lfixunssfsi_bigexp:
- /* Handle unsigned maximum exponent case. */
- bltz a2, 1f
- mov a2, a5 /* no shift needed */
- leaf_return
- /* Return 0x80000000 if negative. */
- 1: slli a2, a6, 8
- leaf_return
- #endif /* L_fixunssfsi */
- #ifdef L_fixunssfdi
- .align 4
- .global __fixunssfdi
- .type __fixunssfdi, @function
- __fixunssfdi:
- leaf_entry sp, 16
- /* Check for NaN and Infinity. */
- movi a6, 0x7f800000
- ball a2, a6, .Lfixunssfdi_nan_or_inf
- /* Extract the exponent and check if 0 <= (exp - 0x7f) < 64. */
- extui a4, a2, 23, 8
- addi a4, a4, -0x7f
- bgei a4, 64, .Lfixunssfdi_maxint
- bltz a4, .Lfixunssfdi_zero
- /* Add explicit "1.0" and shift << 8. */
- or a7, a2, a6
- slli xh, a7, 8
- /* Shift back to the right, based on the exponent. */
- addi a4, a4, 1
- beqi a4, 64, .Lfixunssfdi_bigexp
- ssl a4 /* shift by 64 - a4 */
- bgei a4, 32, .Lfixunssfdi_smallshift
- srl xl, xh
- movi xh, 0
- .Lfixunssfdi_shifted:
- /* Negate the result if sign != 0. */
- bgez a7, 1f
- neg xl, xl
- neg xh, xh
- beqz xl, 1f
- addi xh, xh, -1
- 1: leaf_return
- .Lfixunssfdi_smallshift:
- movi xl, 0
- src xl, xh, xl
- srl xh, xh
- j .Lfixunssfdi_shifted
- .Lfixunssfdi_nan_or_inf:
- /* Handle Infinity and NaN. */
- slli a4, a2, 9
- beqz a4, .Lfixunssfdi_maxint
- /* Translate NaN to 0xffffffff.... */
- 1: movi xh, -1
- movi xl, -1
- leaf_return
- .Lfixunssfdi_maxint:
- bgez a2, 1b
- 2: slli xh, a6, 8 /* 0x80000000 */
- movi xl, 0
- leaf_return
- .Lfixunssfdi_zero:
- movi xh, 0
- movi xl, 0
- leaf_return
- .Lfixunssfdi_bigexp:
- /* Handle unsigned maximum exponent case. */
- bltz a7, 2b
- movi xl, 0
- leaf_return /* no shift needed */
- #endif /* L_fixunssfdi */
- #ifdef L_floatsisf
- .align 4
- .global __floatunsisf
- .type __floatunsisf, @function
- __floatunsisf:
- leaf_entry sp, 16
- beqz a2, .Lfloatsisf_return
- /* Set the sign to zero and jump to the floatsisf code. */
- movi a7, 0
- j .Lfloatsisf_normalize
- .align 4
- .global __floatsisf
- .type __floatsisf, @function
- __floatsisf:
- leaf_entry sp, 16
- /* Check for zero. */
- beqz a2, .Lfloatsisf_return
- /* Save the sign. */
- extui a7, a2, 31, 1
- /* Get the absolute value. */
- #if XCHAL_HAVE_ABS
- abs a2, a2
- #else
- neg a4, a2
- movltz a2, a4, a2
- #endif
- .Lfloatsisf_normalize:
- /* Normalize with the first 1 bit in the msb. */
- do_nsau a4, a2, a5, a6
- ssl a4
- sll a5, a2
- /* Shift the mantissa into position, with rounding bits in a6. */
- srli a2, a5, 8
- slli a6, a5, (32 - 8)
- /* Set the exponent. */
- movi a5, 0x9d /* 0x7e + 31 */
- sub a5, a5, a4
- slli a5, a5, 23
- add a2, a2, a5
- /* Add the sign. */
- slli a7, a7, 31
- or a2, a2, a7
- /* Round up if the leftover fraction is >= 1/2. */
- bgez a6, .Lfloatsisf_return
- addi a2, a2, 1 /* Overflow to the exponent is OK. */
- /* Check if the leftover fraction is exactly 1/2. */
- slli a6, a6, 1
- beqz a6, .Lfloatsisf_exactlyhalf
- .Lfloatsisf_return:
- leaf_return
- .Lfloatsisf_exactlyhalf:
- /* Round down to the nearest even value. */
- srli a2, a2, 1
- slli a2, a2, 1
- leaf_return
- #endif /* L_floatsisf */
- #ifdef L_floatdisf
- .align 4
- .global __floatundisf
- .type __floatundisf, @function
- __floatundisf:
- leaf_entry sp, 16
- /* Check for zero. */
- or a4, xh, xl
- beqz a4, 2f
- /* Set the sign to zero and jump to the floatdisf code. */
- movi a7, 0
- j .Lfloatdisf_normalize
- .align 4
- .global __floatdisf
- .type __floatdisf, @function
- __floatdisf:
- leaf_entry sp, 16
- /* Check for zero. */
- or a4, xh, xl
- beqz a4, 2f
- /* Save the sign. */
- extui a7, xh, 31, 1
- /* Get the absolute value. */
- bgez xh, .Lfloatdisf_normalize
- neg xl, xl
- neg xh, xh
- beqz xl, .Lfloatdisf_normalize
- addi xh, xh, -1
- .Lfloatdisf_normalize:
- /* Normalize with the first 1 bit in the msb of xh. */
- beqz xh, .Lfloatdisf_bigshift
- do_nsau a4, xh, a5, a6
- ssl a4
- src xh, xh, xl
- sll xl, xl
- .Lfloatdisf_shifted:
- /* Shift the mantissa into position, with rounding bits in a6. */
- ssai 8
- sll a5, xl
- src a6, xh, xl
- srl xh, xh
- beqz a5, 1f
- movi a5, 1
- or a6, a6, a5
- 1:
- /* Set the exponent. */
- movi a5, 0xbd /* 0x7e + 63 */
- sub a5, a5, a4
- slli a5, a5, 23
- add a2, xh, a5
- /* Add the sign. */
- slli a7, a7, 31
- or a2, a2, a7
- /* Round up if the leftover fraction is >= 1/2. */
- bgez a6, 2f
- addi a2, a2, 1 /* Overflow to the exponent is OK. */
- /* Check if the leftover fraction is exactly 1/2. */
- slli a6, a6, 1
- beqz a6, .Lfloatdisf_exactlyhalf
- 2: leaf_return
- .Lfloatdisf_bigshift:
- /* xh is zero. Normalize with first 1 bit of xl in the msb of xh. */
- do_nsau a4, xl, a5, a6
- ssl a4
- sll xh, xl
- movi xl, 0
- addi a4, a4, 32
- j .Lfloatdisf_shifted
- .Lfloatdisf_exactlyhalf:
- /* Round down to the nearest even value. */
- srli a2, a2, 1
- slli a2, a2, 1
- leaf_return
- #endif /* L_floatdisf */
- #if XCHAL_HAVE_FP_SQRT
- #ifdef L_sqrtf
- /* Square root */
- .align 4
- .global __ieee754_sqrtf
- .type __ieee754_sqrtf, @function
- __ieee754_sqrtf:
- leaf_entry sp, 16
- wfr f1, a2
- sqrt0.s f2, f1
- const.s f3, 0
- maddn.s f3, f2, f2
- nexp01.s f4, f1
- const.s f0, 3
- addexp.s f4, f0
- maddn.s f0, f3, f4
- nexp01.s f3, f1
- neg.s f5, f3
- maddn.s f2, f0, f2
- const.s f0, 0
- const.s f6, 0
- const.s f7, 0
- maddn.s f0, f5, f2
- maddn.s f6, f2, f4
- const.s f4, 3
- maddn.s f7, f4, f2
- maddn.s f3, f0, f0
- maddn.s f4, f6, f2
- neg.s f2, f7
- maddn.s f0, f3, f2
- maddn.s f7, f4, f7
- mksadj.s f2, f1
- nexp01.s f1, f1
- maddn.s f1, f0, f0
- neg.s f3, f7
- addexpm.s f0, f2
- addexp.s f3, f2
- divn.s f0, f1, f3
- rfr a2, f0
- leaf_return
- #endif /* L_sqrtf */
- #endif /* XCHAL_HAVE_FP_SQRT */
- #if XCHAL_HAVE_FP_RECIP
- #ifdef L_recipsf2
- /* Reciprocal */
- .align 4
- .global __recipsf2
- .type __recipsf2, @function
- __recipsf2:
- leaf_entry sp, 16
- wfr f1, a2
- recip0.s f0, f1
- const.s f2, 1
- msub.s f2, f1, f0
- maddn.s f0, f0, f2
- const.s f2, 1
- msub.s f2, f1, f0
- maddn.s f0, f0, f2
- rfr a2, f0
- leaf_return
- #endif /* L_recipsf2 */
- #endif /* XCHAL_HAVE_FP_RECIP */
- #if XCHAL_HAVE_FP_RSQRT
- #ifdef L_rsqrtsf2
- /* Reciprocal square root */
- .align 4
- .global __rsqrtsf2
- .type __rsqrtsf2, @function
- __rsqrtsf2:
- leaf_entry sp, 16
- wfr f1, a2
- rsqrt0.s f0, f1
- mul.s f2, f1, f0
- const.s f3, 3;
- mul.s f4, f3, f0
- const.s f5, 1
- msub.s f5, f2, f0
- maddn.s f0, f4, f5
- mul.s f2, f1, f0
- mul.s f1, f3, f0
- const.s f3, 1
- msub.s f3, f2, f0
- maddn.s f0, f1, f3
- rfr a2, f0
- leaf_return
- #endif /* L_rsqrtsf2 */
- #endif /* XCHAL_HAVE_FP_RSQRT */
|