123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315 |
- /* -*- Mode: Asm -*- */
- /* Copyright (C) 1998-2022 Free Software Foundation, Inc.
- Contributed by Denis Chertykov <chertykov@gmail.com>
- This file is free software; you can redistribute it and/or modify it
- under the terms of the GNU General Public License as published by the
- Free Software Foundation; either version 3, or (at your option) any
- later version.
- This file is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
- Under Section 7 of GPL version 3, you are granted additional
- permissions described in the GCC Runtime Library Exception, version
- 3.1, as published by the Free Software Foundation.
- You should have received a copy of the GNU General Public License and
- a copy of the GCC Runtime Library Exception along with this program;
- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- <http://www.gnu.org/licenses/>. */
- #if defined (__AVR_TINY__)
- #define __zero_reg__ r17
- #define __tmp_reg__ r16
- #else
- #define __zero_reg__ r1
- #define __tmp_reg__ r0
- #endif
- #define __SREG__ 0x3f
- #if defined (__AVR_HAVE_SPH__)
- #define __SP_H__ 0x3e
- #endif
- #define __SP_L__ 0x3d
- #define __RAMPZ__ 0x3B
- #define __EIND__ 0x3C
- /* Most of the functions here are called directly from avr.md
- patterns, instead of using the standard libcall mechanisms.
- This can make better code because GCC knows exactly which
- of the call-used registers (not all of them) are clobbered. */
- /* FIXME: At present, there is no SORT directive in the linker
- script so that we must not assume that different modules
- in the same input section like .libgcc.text.mul will be
- located close together. Therefore, we cannot use
- RCALL/RJMP to call a function like __udivmodhi4 from
- __divmodhi4 and have to use lengthy XCALL/XJMP even
- though they are in the same input section and all same
- input sections together are small enough to reach every
- location with a RCALL/RJMP instruction. */
- #if defined (__AVR_HAVE_EIJMP_EICALL__) && !defined (__AVR_HAVE_ELPMX__)
- #error device not supported
- #endif
- .macro mov_l r_dest, r_src
- #if defined (__AVR_HAVE_MOVW__)
- movw \r_dest, \r_src
- #else
- mov \r_dest, \r_src
- #endif
- .endm
- .macro mov_h r_dest, r_src
- #if defined (__AVR_HAVE_MOVW__)
- ; empty
- #else
- mov \r_dest, \r_src
- #endif
- .endm
- .macro wmov r_dest, r_src
- #if defined (__AVR_HAVE_MOVW__)
- movw \r_dest, \r_src
- #else
- mov \r_dest, \r_src
- mov \r_dest+1, \r_src+1
- #endif
- .endm
- #if defined (__AVR_HAVE_JMP_CALL__)
- #define XCALL call
- #define XJMP jmp
- #else
- #define XCALL rcall
- #define XJMP rjmp
- #endif
- #if defined (__AVR_HAVE_EIJMP_EICALL__)
- #define XICALL eicall
- #define XIJMP eijmp
- #else
- #define XICALL icall
- #define XIJMP ijmp
- #endif
- ;; Prologue stuff
- .macro do_prologue_saves n_pushed n_frame=0
- ldi r26, lo8(\n_frame)
- ldi r27, hi8(\n_frame)
- ldi r30, lo8(gs(.L_prologue_saves.\@))
- ldi r31, hi8(gs(.L_prologue_saves.\@))
- XJMP __prologue_saves__ + ((18 - (\n_pushed)) * 2)
- .L_prologue_saves.\@:
- .endm
- ;; Epilogue stuff
- .macro do_epilogue_restores n_pushed n_frame=0
- in r28, __SP_L__
- #ifdef __AVR_HAVE_SPH__
- in r29, __SP_H__
- .if \n_frame > 63
- subi r28, lo8(-\n_frame)
- sbci r29, hi8(-\n_frame)
- .elseif \n_frame > 0
- adiw r28, \n_frame
- .endif
- #else
- clr r29
- .if \n_frame > 0
- subi r28, lo8(-\n_frame)
- .endif
- #endif /* HAVE SPH */
- ldi r30, \n_pushed
- XJMP __epilogue_restores__ + ((18 - (\n_pushed)) * 2)
- .endm
- ;; Support function entry and exit for convenience
- .macro wsubi r_arg1, i_arg2
- #if defined (__AVR_TINY__)
- subi \r_arg1, lo8(\i_arg2)
- sbci \r_arg1+1, hi8(\i_arg2)
- #else
- sbiw \r_arg1, \i_arg2
- #endif
- .endm
- .macro waddi r_arg1, i_arg2
- #if defined (__AVR_TINY__)
- subi \r_arg1, lo8(-\i_arg2)
- sbci \r_arg1+1, hi8(-\i_arg2)
- #else
- adiw \r_arg1, \i_arg2
- #endif
- .endm
- .macro DEFUN name
- .global \name
- .func \name
- \name:
- .endm
- .macro ENDF name
- .size \name, .-\name
- .endfunc
- .endm
- .macro FALIAS name
- .global \name
- .func \name
- \name:
- .size \name, .-\name
- .endfunc
- .endm
- ;; Skip next instruction, typically a jump target
- #define skip cpse 16,16
- ;; Negate a 2-byte value held in consecutive registers
- .macro NEG2 reg
- com \reg+1
- neg \reg
- sbci \reg+1, -1
- .endm
- ;; Negate a 4-byte value held in consecutive registers
- ;; Sets the V flag for signed overflow tests if REG >= 16
- .macro NEG4 reg
- com \reg+3
- com \reg+2
- com \reg+1
- .if \reg >= 16
- neg \reg
- sbci \reg+1, -1
- sbci \reg+2, -1
- sbci \reg+3, -1
- .else
- com \reg
- adc \reg, __zero_reg__
- adc \reg+1, __zero_reg__
- adc \reg+2, __zero_reg__
- adc \reg+3, __zero_reg__
- .endif
- .endm
- #define exp_lo(N) hlo8 ((N) << 23)
- #define exp_hi(N) hhi8 ((N) << 23)
- .section .text.libgcc.mul, "ax", @progbits
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- /* Note: mulqi3, mulhi3 are open-coded on the enhanced core. */
- #if !defined (__AVR_HAVE_MUL__)
- /*******************************************************
- Multiplication 8 x 8 without MUL
- *******************************************************/
- #if defined (L_mulqi3)
- #define r_arg2 r22 /* multiplicand */
- #define r_arg1 r24 /* multiplier */
- #define r_res __tmp_reg__ /* result */
- DEFUN __mulqi3
- clr r_res ; clear result
- __mulqi3_loop:
- sbrc r_arg1,0
- add r_res,r_arg2
- add r_arg2,r_arg2 ; shift multiplicand
- breq __mulqi3_exit ; while multiplicand != 0
- lsr r_arg1 ;
- brne __mulqi3_loop ; exit if multiplier = 0
- __mulqi3_exit:
- mov r_arg1,r_res ; result to return register
- ret
- ENDF __mulqi3
- #undef r_arg2
- #undef r_arg1
- #undef r_res
-
- #endif /* defined (L_mulqi3) */
- /*******************************************************
- Widening Multiplication 16 = 8 x 8 without MUL
- Multiplication 16 x 16 without MUL
- *******************************************************/
- #define A0 22
- #define A1 23
- #define B0 24
- #define BB0 20
- #define B1 25
- ;; Output overlaps input, thus expand result in CC0/1
- #define C0 24
- #define C1 25
- #define CC0 __tmp_reg__
- #define CC1 21
- #if defined (L_umulqihi3)
- ;;; R25:R24 = (unsigned int) R22 * (unsigned int) R24
- ;;; (C1:C0) = (unsigned int) A0 * (unsigned int) B0
- ;;; Clobbers: __tmp_reg__, R21..R23
- DEFUN __umulqihi3
- clr A1
- clr B1
- XJMP __mulhi3
- ENDF __umulqihi3
- #endif /* L_umulqihi3 */
- #if defined (L_mulqihi3)
- ;;; R25:R24 = (signed int) R22 * (signed int) R24
- ;;; (C1:C0) = (signed int) A0 * (signed int) B0
- ;;; Clobbers: __tmp_reg__, R20..R23
- DEFUN __mulqihi3
- ;; Sign-extend B0
- clr B1
- sbrc B0, 7
- com B1
- ;; The multiplication runs twice as fast if A1 is zero, thus:
- ;; Zero-extend A0
- clr A1
- #ifdef __AVR_HAVE_JMP_CALL__
- ;; Store B0 * sign of A
- clr BB0
- sbrc A0, 7
- mov BB0, B0
- call __mulhi3
- #else /* have no CALL */
- ;; Skip sign-extension of A if A >= 0
- ;; Same size as with the first alternative but avoids errata skip
- ;; and is faster if A >= 0
- sbrs A0, 7
- rjmp __mulhi3
- ;; If A < 0 store B
- mov BB0, B0
- rcall __mulhi3
- #endif /* HAVE_JMP_CALL */
- ;; 1-extend A after the multiplication
- sub C1, BB0
- ret
- ENDF __mulqihi3
- #endif /* L_mulqihi3 */
- #if defined (L_mulhi3)
- ;;; R25:R24 = R23:R22 * R25:R24
- ;;; (C1:C0) = (A1:A0) * (B1:B0)
- ;;; Clobbers: __tmp_reg__, R21..R23
- DEFUN __mulhi3
- ;; Clear result
- clr CC0
- clr CC1
- rjmp 3f
- 1:
- ;; Bit n of A is 1 --> C += B << n
- add CC0, B0
- adc CC1, B1
- 2:
- lsl B0
- rol B1
- 3:
- ;; If B == 0 we are ready
- wsubi B0, 0
- breq 9f
- ;; Carry = n-th bit of A
- lsr A1
- ror A0
- ;; If bit n of A is set, then go add B * 2^n to C
- brcs 1b
- ;; Carry = 0 --> The ROR above acts like CP A0, 0
- ;; Thus, it is sufficient to CPC the high part to test A against 0
- cpc A1, __zero_reg__
- ;; Only proceed if A != 0
- brne 2b
- 9:
- ;; Move Result into place
- mov C0, CC0
- mov C1, CC1
- ret
- ENDF __mulhi3
- #endif /* L_mulhi3 */
- #undef A0
- #undef A1
- #undef B0
- #undef BB0
- #undef B1
- #undef C0
- #undef C1
- #undef CC0
- #undef CC1
- #define A0 22
- #define A1 A0+1
- #define A2 A0+2
- #define A3 A0+3
- #define B0 18
- #define B1 B0+1
- #define B2 B0+2
- #define B3 B0+3
- #define CC0 26
- #define CC1 CC0+1
- #define CC2 30
- #define CC3 CC2+1
- #define C0 22
- #define C1 C0+1
- #define C2 C0+2
- #define C3 C0+3
- /*******************************************************
- Widening Multiplication 32 = 16 x 16 without MUL
- *******************************************************/
- #if defined (L_umulhisi3)
- DEFUN __umulhisi3
- wmov B0, 24
- ;; Zero-extend B
- clr B2
- clr B3
- ;; Zero-extend A
- wmov A2, B2
- XJMP __mulsi3
- ENDF __umulhisi3
- #endif /* L_umulhisi3 */
- #if defined (L_mulhisi3)
- DEFUN __mulhisi3
- wmov B0, 24
- ;; Sign-extend B
- lsl r25
- sbc B2, B2
- mov B3, B2
- #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
- ;; Sign-extend A
- clr A2
- sbrc A1, 7
- com A2
- mov A3, A2
- XJMP __mulsi3
- #else /* no __AVR_ERRATA_SKIP_JMP_CALL__ */
- ;; Zero-extend A and __mulsi3 will run at least twice as fast
- ;; compared to a sign-extended A.
- clr A2
- clr A3
- sbrs A1, 7
- XJMP __mulsi3
- ;; If A < 0 then perform the B * 0xffff.... before the
- ;; very multiplication by initializing the high part of the
- ;; result CC with -B.
- wmov CC2, A2
- sub CC2, B0
- sbc CC3, B1
- XJMP __mulsi3_helper
- #endif /* __AVR_ERRATA_SKIP_JMP_CALL__ */
- ENDF __mulhisi3
- #endif /* L_mulhisi3 */
- /*******************************************************
- Multiplication 32 x 32 without MUL
- *******************************************************/
- #if defined (L_mulsi3)
- DEFUN __mulsi3
- #if defined (__AVR_TINY__)
- in r26, __SP_L__ ; safe to use X, as it is CC0/CC1
- in r27, __SP_H__
- subi r26, lo8(-3) ; Add 3 to point past return address
- sbci r27, hi8(-3)
- push B0 ; save callee saved regs
- push B1
- ld B0, X+ ; load from caller stack
- ld B1, X+
- ld B2, X+
- ld B3, X
- #endif
- ;; Clear result
- clr CC2
- clr CC3
- ;; FALLTHRU
- ENDF __mulsi3
- DEFUN __mulsi3_helper
- clr CC0
- clr CC1
- rjmp 3f
- 1: ;; If bit n of A is set, then add B * 2^n to the result in CC
- ;; CC += B
- add CC0,B0 $ adc CC1,B1 $ adc CC2,B2 $ adc CC3,B3
- 2: ;; B <<= 1
- lsl B0 $ rol B1 $ rol B2 $ rol B3
- 3: ;; A >>= 1: Carry = n-th bit of A
- lsr A3 $ ror A2 $ ror A1 $ ror A0
- brcs 1b
- ;; Only continue if A != 0
- sbci A1, 0
- brne 2b
- wsubi A2, 0
- brne 2b
- ;; All bits of A are consumed: Copy result to return register C
- wmov C0, CC0
- wmov C2, CC2
- #if defined (__AVR_TINY__)
- pop B1 ; restore callee saved regs
- pop B0
- #endif /* defined (__AVR_TINY__) */
- ret
- ENDF __mulsi3_helper
- #endif /* L_mulsi3 */
- #undef A0
- #undef A1
- #undef A2
- #undef A3
- #undef B0
- #undef B1
- #undef B2
- #undef B3
- #undef C0
- #undef C1
- #undef C2
- #undef C3
- #undef CC0
- #undef CC1
- #undef CC2
- #undef CC3
- #endif /* !defined (__AVR_HAVE_MUL__) */
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- #if defined (__AVR_HAVE_MUL__)
- #define A0 26
- #define B0 18
- #define C0 22
- #define A1 A0+1
- #define B1 B0+1
- #define B2 B0+2
- #define B3 B0+3
- #define C1 C0+1
- #define C2 C0+2
- #define C3 C0+3
- /*******************************************************
- Widening Multiplication 32 = 16 x 16 with MUL
- *******************************************************/
- #if defined (L_mulhisi3)
- ;;; R25:R22 = (signed long) R27:R26 * (signed long) R19:R18
- ;;; C3:C0 = (signed long) A1:A0 * (signed long) B1:B0
- ;;; Clobbers: __tmp_reg__
- DEFUN __mulhisi3
- XCALL __umulhisi3
- ;; Sign-extend B
- tst B1
- brpl 1f
- sub C2, A0
- sbc C3, A1
- 1: ;; Sign-extend A
- XJMP __usmulhisi3_tail
- ENDF __mulhisi3
- #endif /* L_mulhisi3 */
- #if defined (L_usmulhisi3)
- ;;; R25:R22 = (signed long) R27:R26 * (unsigned long) R19:R18
- ;;; C3:C0 = (signed long) A1:A0 * (unsigned long) B1:B0
- ;;; Clobbers: __tmp_reg__
- DEFUN __usmulhisi3
- XCALL __umulhisi3
- ;; FALLTHRU
- ENDF __usmulhisi3
- DEFUN __usmulhisi3_tail
- ;; Sign-extend A
- sbrs A1, 7
- ret
- sub C2, B0
- sbc C3, B1
- ret
- ENDF __usmulhisi3_tail
- #endif /* L_usmulhisi3 */
- #if defined (L_umulhisi3)
- ;;; R25:R22 = (unsigned long) R27:R26 * (unsigned long) R19:R18
- ;;; C3:C0 = (unsigned long) A1:A0 * (unsigned long) B1:B0
- ;;; Clobbers: __tmp_reg__
- DEFUN __umulhisi3
- mul A0, B0
- movw C0, r0
- mul A1, B1
- movw C2, r0
- mul A0, B1
- #ifdef __AVR_HAVE_JMP_CALL__
- ;; This function is used by many other routines, often multiple times.
- ;; Therefore, if the flash size is not too limited, avoid the RCALL
- ;; and inverst 6 Bytes to speed things up.
- add C1, r0
- adc C2, r1
- clr __zero_reg__
- adc C3, __zero_reg__
- #else
- rcall 1f
- #endif
- mul A1, B0
- 1: add C1, r0
- adc C2, r1
- clr __zero_reg__
- adc C3, __zero_reg__
- ret
- ENDF __umulhisi3
- #endif /* L_umulhisi3 */
- /*******************************************************
- Widening Multiplication 32 = 16 x 32 with MUL
- *******************************************************/
- #if defined (L_mulshisi3)
- ;;; R25:R22 = (signed long) R27:R26 * R21:R18
- ;;; (C3:C0) = (signed long) A1:A0 * B3:B0
- ;;; Clobbers: __tmp_reg__
- DEFUN __mulshisi3
- #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
- ;; Some cores have problem skipping 2-word instruction
- tst A1
- brmi __mulohisi3
- #else
- sbrs A1, 7
- #endif /* __AVR_HAVE_JMP_CALL__ */
- XJMP __muluhisi3
- ;; FALLTHRU
- ENDF __mulshisi3
- ;;; R25:R22 = (one-extended long) R27:R26 * R21:R18
- ;;; (C3:C0) = (one-extended long) A1:A0 * B3:B0
- ;;; Clobbers: __tmp_reg__
- DEFUN __mulohisi3
- XCALL __muluhisi3
- ;; One-extend R27:R26 (A1:A0)
- sub C2, B0
- sbc C3, B1
- ret
- ENDF __mulohisi3
- #endif /* L_mulshisi3 */
- #if defined (L_muluhisi3)
- ;;; R25:R22 = (unsigned long) R27:R26 * R21:R18
- ;;; (C3:C0) = (unsigned long) A1:A0 * B3:B0
- ;;; Clobbers: __tmp_reg__
- DEFUN __muluhisi3
- XCALL __umulhisi3
- mul A0, B3
- add C3, r0
- mul A1, B2
- add C3, r0
- mul A0, B2
- add C2, r0
- adc C3, r1
- clr __zero_reg__
- ret
- ENDF __muluhisi3
- #endif /* L_muluhisi3 */
- /*******************************************************
- Multiplication 32 x 32 with MUL
- *******************************************************/
- #if defined (L_mulsi3)
- ;;; R25:R22 = R25:R22 * R21:R18
- ;;; (C3:C0) = C3:C0 * B3:B0
- ;;; Clobbers: R26, R27, __tmp_reg__
- DEFUN __mulsi3
- movw A0, C0
- push C2
- push C3
- XCALL __muluhisi3
- pop A1
- pop A0
- ;; A1:A0 now contains the high word of A
- mul A0, B0
- add C2, r0
- adc C3, r1
- mul A0, B1
- add C3, r0
- mul A1, B0
- add C3, r0
- clr __zero_reg__
- ret
- ENDF __mulsi3
- #endif /* L_mulsi3 */
- #undef A0
- #undef A1
- #undef B0
- #undef B1
- #undef B2
- #undef B3
- #undef C0
- #undef C1
- #undef C2
- #undef C3
- #endif /* __AVR_HAVE_MUL__ */
- /*******************************************************
- Multiplication 24 x 24 with MUL
- *******************************************************/
- #if defined (L_mulpsi3)
- ;; A[0..2]: In: Multiplicand; Out: Product
- #define A0 22
- #define A1 A0+1
- #define A2 A0+2
- ;; B[0..2]: In: Multiplier
- #define B0 18
- #define B1 B0+1
- #define B2 B0+2
- #if defined (__AVR_HAVE_MUL__)
- ;; C[0..2]: Expand Result
- #define C0 22
- #define C1 C0+1
- #define C2 C0+2
- ;; R24:R22 *= R20:R18
- ;; Clobbers: r21, r25, r26, r27, __tmp_reg__
- #define AA0 26
- #define AA2 21
- DEFUN __mulpsi3
- wmov AA0, A0
- mov AA2, A2
- XCALL __umulhisi3
- mul AA2, B0 $ add C2, r0
- mul AA0, B2 $ add C2, r0
- clr __zero_reg__
- ret
- ENDF __mulpsi3
- #undef AA2
- #undef AA0
- #undef C2
- #undef C1
- #undef C0
- #else /* !HAVE_MUL */
- ;; C[0..2]: Expand Result
- #if defined (__AVR_TINY__)
- #define C0 16
- #else
- #define C0 0
- #endif /* defined (__AVR_TINY__) */
- #define C1 C0+1
- #define C2 21
- ;; R24:R22 *= R20:R18
- ;; Clobbers: __tmp_reg__, R18, R19, R20, R21
- DEFUN __mulpsi3
- #if defined (__AVR_TINY__)
- in r26,__SP_L__
- in r27,__SP_H__
- subi r26, lo8(-3) ; Add 3 to point past return address
- sbci r27, hi8(-3)
- push B0 ; save callee saved regs
- push B1
- ld B0,X+ ; load from caller stack
- ld B1,X+
- ld B2,X+
- #endif /* defined (__AVR_TINY__) */
- ;; C[] = 0
- clr __tmp_reg__
- clr C2
- 0: ;; Shift N-th Bit of B[] into Carry. N = 24 - Loop
- LSR B2 $ ror B1 $ ror B0
- ;; If the N-th Bit of B[] was set...
- brcc 1f
- ;; ...then add A[] * 2^N to the Result C[]
- ADD C0,A0 $ adc C1,A1 $ adc C2,A2
- 1: ;; Multiply A[] by 2
- LSL A0 $ rol A1 $ rol A2
- ;; Loop until B[] is 0
- subi B0,0 $ sbci B1,0 $ sbci B2,0
- brne 0b
- ;; Copy C[] to the return Register A[]
- wmov A0, C0
- mov A2, C2
- clr __zero_reg__
- #if defined (__AVR_TINY__)
- pop B1
- pop B0
- #endif /* (__AVR_TINY__) */
- ret
- ENDF __mulpsi3
- #undef C2
- #undef C1
- #undef C0
- #endif /* HAVE_MUL */
- #undef B2
- #undef B1
- #undef B0
- #undef A2
- #undef A1
- #undef A0
- #endif /* L_mulpsi3 */
- #if defined (L_mulsqipsi3) && defined (__AVR_HAVE_MUL__)
- ;; A[0..2]: In: Multiplicand
- #define A0 22
- #define A1 A0+1
- #define A2 A0+2
- ;; BB: In: Multiplier
- #define BB 25
- ;; C[0..2]: Result
- #define C0 18
- #define C1 C0+1
- #define C2 C0+2
- ;; C[] = A[] * sign_extend (BB)
- DEFUN __mulsqipsi3
- mul A0, BB
- movw C0, r0
- mul A2, BB
- mov C2, r0
- mul A1, BB
- add C1, r0
- adc C2, r1
- clr __zero_reg__
- sbrs BB, 7
- ret
- ;; One-extend BB
- sub C1, A0
- sbc C2, A1
- ret
- ENDF __mulsqipsi3
- #undef C2
- #undef C1
- #undef C0
- #undef BB
- #undef A2
- #undef A1
- #undef A0
- #endif /* L_mulsqipsi3 && HAVE_MUL */
- /*******************************************************
- Multiplication 64 x 64
- *******************************************************/
- ;; A[] = A[] * B[]
- ;; A[0..7]: In: Multiplicand
- ;; Out: Product
- #define A0 18
- #define A1 A0+1
- #define A2 A0+2
- #define A3 A0+3
- #define A4 A0+4
- #define A5 A0+5
- #define A6 A0+6
- #define A7 A0+7
- ;; B[0..7]: In: Multiplier
- #define B0 10
- #define B1 B0+1
- #define B2 B0+2
- #define B3 B0+3
- #define B4 B0+4
- #define B5 B0+5
- #define B6 B0+6
- #define B7 B0+7
- #ifndef __AVR_TINY__
- #if defined (__AVR_HAVE_MUL__)
- ;; Define C[] for convenience
- ;; Notice that parts of C[] overlap A[] respective B[]
- #define C0 16
- #define C1 C0+1
- #define C2 20
- #define C3 C2+1
- #define C4 28
- #define C5 C4+1
- #define C6 C4+2
- #define C7 C4+3
- #if defined (L_muldi3)
- ;; A[] *= B[]
- ;; R25:R18 *= R17:R10
- ;; Ordinary ABI-Function
- DEFUN __muldi3
- push r29
- push r28
- push r17
- push r16
- ;; Counting in Words, we have to perform a 4 * 4 Multiplication
- ;; 3 * 0 + 0 * 3
- mul A7,B0 $ $ mov C7,r0
- mul A0,B7 $ $ add C7,r0
- mul A6,B1 $ $ add C7,r0
- mul A6,B0 $ mov C6,r0 $ add C7,r1
- mul B6,A1 $ $ add C7,r0
- mul B6,A0 $ add C6,r0 $ adc C7,r1
- ;; 1 * 2
- mul A2,B4 $ add C6,r0 $ adc C7,r1
- mul A3,B4 $ $ add C7,r0
- mul A2,B5 $ $ add C7,r0
- push A5
- push A4
- push B1
- push B0
- push A3
- push A2
- ;; 0 * 0
- wmov 26, B0
- XCALL __umulhisi3
- wmov C0, 22
- wmov C2, 24
- ;; 0 * 2
- wmov 26, B4
- XCALL __umulhisi3 $ wmov C4,22 $ add C6,24 $ adc C7,25
- wmov 26, B2
- ;; 0 * 1
- XCALL __muldi3_6
- pop A0
- pop A1
- ;; 1 * 1
- wmov 26, B2
- XCALL __umulhisi3 $ add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25
- pop r26
- pop r27
- ;; 1 * 0
- XCALL __muldi3_6
- pop A0
- pop A1
- ;; 2 * 0
- XCALL __umulhisi3 $ add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25
- ;; 2 * 1
- wmov 26, B2
- XCALL __umulhisi3 $ $ $ add C6,22 $ adc C7,23
- ;; A[] = C[]
- wmov A0, C0
- ;; A2 = C2 already
- wmov A4, C4
- wmov A6, C6
- pop r16
- pop r17
- pop r28
- pop r29
- ret
- ENDF __muldi3
- #endif /* L_muldi3 */
- #if defined (L_muldi3_6)
- ;; A helper for some 64-bit multiplications with MUL available
- DEFUN __muldi3_6
- __muldi3_6:
- XCALL __umulhisi3
- add C2, 22
- adc C3, 23
- adc C4, 24
- adc C5, 25
- brcc 0f
- adiw C6, 1
- 0: ret
- ENDF __muldi3_6
- #endif /* L_muldi3_6 */
- #undef C7
- #undef C6
- #undef C5
- #undef C4
- #undef C3
- #undef C2
- #undef C1
- #undef C0
- #else /* !HAVE_MUL */
- #if defined (L_muldi3)
- #define C0 26
- #define C1 C0+1
- #define C2 C0+2
- #define C3 C0+3
- #define C4 C0+4
- #define C5 C0+5
- #define C6 0
- #define C7 C6+1
- #define Loop 9
- ;; A[] *= B[]
- ;; R25:R18 *= R17:R10
- ;; Ordinary ABI-Function
- DEFUN __muldi3
- push r29
- push r28
- push Loop
- ldi C0, 64
- mov Loop, C0
- ;; C[] = 0
- clr __tmp_reg__
- wmov C0, 0
- wmov C2, 0
- wmov C4, 0
- 0: ;; Rotate B[] right by 1 and set Carry to the N-th Bit of B[]
- ;; where N = 64 - Loop.
- ;; Notice that B[] = B[] >>> 64 so after this Routine has finished,
- ;; B[] will have its initial Value again.
- LSR B7 $ ror B6 $ ror B5 $ ror B4
- ror B3 $ ror B2 $ ror B1 $ ror B0
- ;; If the N-th Bit of B[] was set then...
- brcc 1f
- ;; ...finish Rotation...
- ori B7, 1 << 7
- ;; ...and add A[] * 2^N to the Result C[]
- ADD C0,A0 $ adc C1,A1 $ adc C2,A2 $ adc C3,A3
- adc C4,A4 $ adc C5,A5 $ adc C6,A6 $ adc C7,A7
- 1: ;; Multiply A[] by 2
- LSL A0 $ rol A1 $ rol A2 $ rol A3
- rol A4 $ rol A5 $ rol A6 $ rol A7
- dec Loop
- brne 0b
- ;; We expanded the Result in C[]
- ;; Copy Result to the Return Register A[]
- wmov A0, C0
- wmov A2, C2
- wmov A4, C4
- wmov A6, C6
- clr __zero_reg__
- pop Loop
- pop r28
- pop r29
- ret
- ENDF __muldi3
- #undef Loop
- #undef C7
- #undef C6
- #undef C5
- #undef C4
- #undef C3
- #undef C2
- #undef C1
- #undef C0
- #endif /* L_muldi3 */
- #endif /* HAVE_MUL */
- #endif /* if not __AVR_TINY__ */
- #undef B7
- #undef B6
- #undef B5
- #undef B4
- #undef B3
- #undef B2
- #undef B1
- #undef B0
- #undef A7
- #undef A6
- #undef A5
- #undef A4
- #undef A3
- #undef A2
- #undef A1
- #undef A0
- /*******************************************************
- Widening Multiplication 64 = 32 x 32 with MUL
- *******************************************************/
- #if defined (__AVR_HAVE_MUL__)
- #define A0 r22
- #define A1 r23
- #define A2 r24
- #define A3 r25
-
- #define B0 r18
- #define B1 r19
- #define B2 r20
- #define B3 r21
-
- #define C0 18
- #define C1 C0+1
- #define C2 20
- #define C3 C2+1
- #define C4 28
- #define C5 C4+1
- #define C6 C4+2
- #define C7 C4+3
- #if defined (L_umulsidi3)
- ;; Unsigned widening 64 = 32 * 32 Multiplication with MUL
- ;; R18[8] = R22[4] * R18[4]
- ;;
- ;; Ordinary ABI Function, but additionally sets
- ;; X = R20[2] = B2[2]
- ;; Z = R22[2] = A0[2]
- DEFUN __umulsidi3
- clt
- ;; FALLTHRU
- ENDF __umulsidi3
- ;; T = sign (A)
- DEFUN __umulsidi3_helper
- push 29 $ push 28 ; Y
- wmov 30, A2
- ;; Counting in Words, we have to perform 4 Multiplications
- ;; 0 * 0
- wmov 26, A0
- XCALL __umulhisi3
- push 23 $ push 22 ; C0
- wmov 28, B0
- wmov 18, B2
- wmov C2, 24
- push 27 $ push 26 ; A0
- push 19 $ push 18 ; B2
- ;;
- ;; 18 20 22 24 26 28 30 | B2, B3, A0, A1, C0, C1, Y
- ;; B2 C2 -- -- -- B0 A2
- ;; 1 * 1
- wmov 26, 30 ; A2
- XCALL __umulhisi3
- ;; Sign-extend A. T holds the sign of A
- brtc 0f
- ;; Subtract B from the high part of the result
- sub 22, 28
- sbc 23, 29
- sbc 24, 18
- sbc 25, 19
- 0: wmov 18, 28 ;; B0
- wmov C4, 22
- wmov C6, 24
- ;;
- ;; 18 20 22 24 26 28 30 | B2, B3, A0, A1, C0, C1, Y
- ;; B0 C2 -- -- A2 C4 C6
- ;;
- ;; 1 * 0
- XCALL __muldi3_6
- ;; 0 * 1
- pop 26 $ pop 27 ;; B2
- pop 18 $ pop 19 ;; A0
- XCALL __muldi3_6
- ;; Move result C into place and save A0 in Z
- wmov 22, C4
- wmov 24, C6
- wmov 30, 18 ; A0
- pop C0 $ pop C1
- ;; Epilogue
- pop 28 $ pop 29 ;; Y
- ret
- ENDF __umulsidi3_helper
- #endif /* L_umulsidi3 */
- #if defined (L_mulsidi3)
- ;; Signed widening 64 = 32 * 32 Multiplication
- ;;
- ;; R18[8] = R22[4] * R18[4]
- ;; Ordinary ABI Function
- DEFUN __mulsidi3
- bst A3, 7
- sbrs B3, 7 ; Enhanced core has no skip bug
- XJMP __umulsidi3_helper
- ;; B needs sign-extension
- push A3
- push A2
- XCALL __umulsidi3_helper
- ;; A0 survived in Z
- sub r22, r30
- sbc r23, r31
- pop r26
- pop r27
- sbc r24, r26
- sbc r25, r27
- ret
- ENDF __mulsidi3
- #endif /* L_mulsidi3 */
- #undef A0
- #undef A1
- #undef A2
- #undef A3
- #undef B0
- #undef B1
- #undef B2
- #undef B3
- #undef C0
- #undef C1
- #undef C2
- #undef C3
- #undef C4
- #undef C5
- #undef C6
- #undef C7
- #endif /* HAVE_MUL */
- /**********************************************************
- Widening Multiplication 64 = 32 x 32 without MUL
- **********************************************************/
- #ifndef __AVR_TINY__ /* if not __AVR_TINY__ */
- #if defined (L_mulsidi3) && !defined (__AVR_HAVE_MUL__)
- #define A0 18
- #define A1 A0+1
- #define A2 A0+2
- #define A3 A0+3
- #define A4 A0+4
- #define A5 A0+5
- #define A6 A0+6
- #define A7 A0+7
- #define B0 10
- #define B1 B0+1
- #define B2 B0+2
- #define B3 B0+3
- #define B4 B0+4
- #define B5 B0+5
- #define B6 B0+6
- #define B7 B0+7
- #define AA0 22
- #define AA1 AA0+1
- #define AA2 AA0+2
- #define AA3 AA0+3
- #define BB0 18
- #define BB1 BB0+1
- #define BB2 BB0+2
- #define BB3 BB0+3
- #define Mask r30
- ;; Signed / Unsigned widening 64 = 32 * 32 Multiplication without MUL
- ;;
- ;; R18[8] = R22[4] * R18[4]
- ;; Ordinary ABI Function
- DEFUN __mulsidi3
- set
- skip
- ;; FALLTHRU
- ENDF __mulsidi3
- DEFUN __umulsidi3
- clt ; skipped
- ;; Save 10 Registers: R10..R17, R28, R29
- do_prologue_saves 10
- ldi Mask, 0xff
- bld Mask, 7
- ;; Move B into place...
- wmov B0, BB0
- wmov B2, BB2
- ;; ...and extend it
- and BB3, Mask
- lsl BB3
- sbc B4, B4
- mov B5, B4
- wmov B6, B4
- ;; Move A into place...
- wmov A0, AA0
- wmov A2, AA2
- ;; ...and extend it
- and AA3, Mask
- lsl AA3
- sbc A4, A4
- mov A5, A4
- wmov A6, A4
- XCALL __muldi3
- do_epilogue_restores 10
- ENDF __umulsidi3
- #undef A0
- #undef A1
- #undef A2
- #undef A3
- #undef A4
- #undef A5
- #undef A6
- #undef A7
- #undef B0
- #undef B1
- #undef B2
- #undef B3
- #undef B4
- #undef B5
- #undef B6
- #undef B7
- #undef AA0
- #undef AA1
- #undef AA2
- #undef AA3
- #undef BB0
- #undef BB1
- #undef BB2
- #undef BB3
- #undef Mask
- #endif /* L_mulsidi3 && !HAVE_MUL */
- #endif /* if not __AVR_TINY__ */
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
- .section .text.libgcc.div, "ax", @progbits
- /*******************************************************
- Division 8 / 8 => (result + remainder)
- *******************************************************/
- #define r_rem r25 /* remainder */
- #define r_arg1 r24 /* dividend, quotient */
- #define r_arg2 r22 /* divisor */
- #define r_cnt r23 /* loop count */
- #if defined (L_udivmodqi4)
- DEFUN __udivmodqi4
- sub r_rem,r_rem ; clear remainder and carry
- ldi r_cnt,9 ; init loop counter
- rjmp __udivmodqi4_ep ; jump to entry point
- __udivmodqi4_loop:
- rol r_rem ; shift dividend into remainder
- cp r_rem,r_arg2 ; compare remainder & divisor
- brcs __udivmodqi4_ep ; remainder <= divisor
- sub r_rem,r_arg2 ; restore remainder
- __udivmodqi4_ep:
- rol r_arg1 ; shift dividend (with CARRY)
- dec r_cnt ; decrement loop counter
- brne __udivmodqi4_loop
- com r_arg1 ; complement result
- ; because C flag was complemented in loop
- ret
- ENDF __udivmodqi4
- #endif /* defined (L_udivmodqi4) */
- #if defined (L_divmodqi4)
- DEFUN __divmodqi4
- bst r_arg1,7 ; store sign of dividend
- mov __tmp_reg__,r_arg1
- eor __tmp_reg__,r_arg2; r0.7 is sign of result
- sbrc r_arg1,7
- neg r_arg1 ; dividend negative : negate
- sbrc r_arg2,7
- neg r_arg2 ; divisor negative : negate
- XCALL __udivmodqi4 ; do the unsigned div/mod
- brtc __divmodqi4_1
- neg r_rem ; correct remainder sign
- __divmodqi4_1:
- sbrc __tmp_reg__,7
- neg r_arg1 ; correct result sign
- __divmodqi4_exit:
- ret
- ENDF __divmodqi4
- #endif /* defined (L_divmodqi4) */
- #undef r_rem
- #undef r_arg1
- #undef r_arg2
- #undef r_cnt
-
-
- /*******************************************************
- Division 16 / 16 => (result + remainder)
- *******************************************************/
- #define r_remL r26 /* remainder Low */
- #define r_remH r27 /* remainder High */
- /* return: remainder */
- #define r_arg1L r24 /* dividend Low */
- #define r_arg1H r25 /* dividend High */
- /* return: quotient */
- #define r_arg2L r22 /* divisor Low */
- #define r_arg2H r23 /* divisor High */
-
- #define r_cnt r21 /* loop count */
- #if defined (L_udivmodhi4)
- DEFUN __udivmodhi4
- sub r_remL,r_remL
- sub r_remH,r_remH ; clear remainder and carry
- ldi r_cnt,17 ; init loop counter
- rjmp __udivmodhi4_ep ; jump to entry point
- __udivmodhi4_loop:
- rol r_remL ; shift dividend into remainder
- rol r_remH
- cp r_remL,r_arg2L ; compare remainder & divisor
- cpc r_remH,r_arg2H
- brcs __udivmodhi4_ep ; remainder < divisor
- sub r_remL,r_arg2L ; restore remainder
- sbc r_remH,r_arg2H
- __udivmodhi4_ep:
- rol r_arg1L ; shift dividend (with CARRY)
- rol r_arg1H
- dec r_cnt ; decrement loop counter
- brne __udivmodhi4_loop
- com r_arg1L
- com r_arg1H
- ; div/mod results to return registers, as for the div() function
- mov_l r_arg2L, r_arg1L ; quotient
- mov_h r_arg2H, r_arg1H
- mov_l r_arg1L, r_remL ; remainder
- mov_h r_arg1H, r_remH
- ret
- ENDF __udivmodhi4
- #endif /* defined (L_udivmodhi4) */
- #if defined (L_divmodhi4)
- DEFUN __divmodhi4
- .global _div
- _div:
- bst r_arg1H,7 ; store sign of dividend
- mov __tmp_reg__,r_arg2H
- brtc 0f
- com __tmp_reg__ ; r0.7 is sign of result
- rcall __divmodhi4_neg1 ; dividend negative: negate
- 0:
- sbrc r_arg2H,7
- rcall __divmodhi4_neg2 ; divisor negative: negate
- XCALL __udivmodhi4 ; do the unsigned div/mod
- sbrc __tmp_reg__,7
- rcall __divmodhi4_neg2 ; correct remainder sign
- brtc __divmodhi4_exit
- __divmodhi4_neg1:
- ;; correct dividend/remainder sign
- com r_arg1H
- neg r_arg1L
- sbci r_arg1H,0xff
- ret
- __divmodhi4_neg2:
- ;; correct divisor/result sign
- com r_arg2H
- neg r_arg2L
- sbci r_arg2H,0xff
- __divmodhi4_exit:
- ret
- ENDF __divmodhi4
- #endif /* defined (L_divmodhi4) */
- #undef r_remH
- #undef r_remL
- #undef r_arg1H
- #undef r_arg1L
- #undef r_arg2H
- #undef r_arg2L
-
- #undef r_cnt
- /*******************************************************
- Division 24 / 24 => (result + remainder)
- *******************************************************/
- ;; A[0..2]: In: Dividend; Out: Quotient
- #define A0 22
- #define A1 A0+1
- #define A2 A0+2
- ;; B[0..2]: In: Divisor; Out: Remainder
- #define B0 18
- #define B1 B0+1
- #define B2 B0+2
- ;; C[0..2]: Expand remainder
- #define C0 __zero_reg__
- #define C1 26
- #define C2 25
- ;; Loop counter
- #define r_cnt 21
- #if defined (L_udivmodpsi4)
- ;; R24:R22 = R24:R24 udiv R20:R18
- ;; R20:R18 = R24:R22 umod R20:R18
- ;; Clobbers: R21, R25, R26
- DEFUN __udivmodpsi4
- ; init loop counter
- ldi r_cnt, 24+1
- ; Clear remainder and carry. C0 is already 0
- clr C1
- sub C2, C2
- ; jump to entry point
- rjmp __udivmodpsi4_start
- __udivmodpsi4_loop:
- ; shift dividend into remainder
- rol C0
- rol C1
- rol C2
- ; compare remainder & divisor
- cp C0, B0
- cpc C1, B1
- cpc C2, B2
- brcs __udivmodpsi4_start ; remainder <= divisor
- sub C0, B0 ; restore remainder
- sbc C1, B1
- sbc C2, B2
- __udivmodpsi4_start:
- ; shift dividend (with CARRY)
- rol A0
- rol A1
- rol A2
- ; decrement loop counter
- dec r_cnt
- brne __udivmodpsi4_loop
- com A0
- com A1
- com A2
- ; div/mod results to return registers
- ; remainder
- mov B0, C0
- mov B1, C1
- mov B2, C2
- clr __zero_reg__ ; C0
- ret
- ENDF __udivmodpsi4
- #endif /* defined (L_udivmodpsi4) */
- #if defined (L_divmodpsi4)
- ;; R24:R22 = R24:R22 div R20:R18
- ;; R20:R18 = R24:R22 mod R20:R18
- ;; Clobbers: T, __tmp_reg__, R21, R25, R26
- DEFUN __divmodpsi4
- ; R0.7 will contain the sign of the result:
- ; R0.7 = A.sign ^ B.sign
- mov __tmp_reg__, B2
- ; T-flag = sign of dividend
- bst A2, 7
- brtc 0f
- com __tmp_reg__
- ; Adjust dividend's sign
- rcall __divmodpsi4_negA
- 0:
- ; Adjust divisor's sign
- sbrc B2, 7
- rcall __divmodpsi4_negB
- ; Do the unsigned div/mod
- XCALL __udivmodpsi4
- ; Adjust quotient's sign
- sbrc __tmp_reg__, 7
- rcall __divmodpsi4_negA
- ; Adjust remainder's sign
- brtc __divmodpsi4_end
- __divmodpsi4_negB:
- ; Correct divisor/remainder sign
- com B2
- com B1
- neg B0
- sbci B1, -1
- sbci B2, -1
- ret
- ; Correct dividend/quotient sign
- __divmodpsi4_negA:
- com A2
- com A1
- neg A0
- sbci A1, -1
- sbci A2, -1
- __divmodpsi4_end:
- ret
- ENDF __divmodpsi4
- #endif /* defined (L_divmodpsi4) */
- #undef A0
- #undef A1
- #undef A2
- #undef B0
- #undef B1
- #undef B2
- #undef C0
- #undef C1
- #undef C2
- #undef r_cnt
- /*******************************************************
- Division 32 / 32 => (result + remainder)
- *******************************************************/
- #define r_remHH r31 /* remainder High */
- #define r_remHL r30
- #define r_remH r27
- #define r_remL r26 /* remainder Low */
- /* return: remainder */
- #define r_arg1HH r25 /* dividend High */
- #define r_arg1HL r24
- #define r_arg1H r23
- #define r_arg1L r22 /* dividend Low */
- /* return: quotient */
- #define r_arg2HH r21 /* divisor High */
- #define r_arg2HL r20
- #define r_arg2H r19
- #define r_arg2L r18 /* divisor Low */
-
- #define r_cnt __zero_reg__ /* loop count (0 after the loop!) */
- #if defined (L_udivmodsi4)
- DEFUN __udivmodsi4
- ldi r_remL, 33 ; init loop counter
- mov r_cnt, r_remL
- sub r_remL,r_remL
- sub r_remH,r_remH ; clear remainder and carry
- mov_l r_remHL, r_remL
- mov_h r_remHH, r_remH
- rjmp __udivmodsi4_ep ; jump to entry point
- __udivmodsi4_loop:
- rol r_remL ; shift dividend into remainder
- rol r_remH
- rol r_remHL
- rol r_remHH
- cp r_remL,r_arg2L ; compare remainder & divisor
- cpc r_remH,r_arg2H
- cpc r_remHL,r_arg2HL
- cpc r_remHH,r_arg2HH
- brcs __udivmodsi4_ep ; remainder <= divisor
- sub r_remL,r_arg2L ; restore remainder
- sbc r_remH,r_arg2H
- sbc r_remHL,r_arg2HL
- sbc r_remHH,r_arg2HH
- __udivmodsi4_ep:
- rol r_arg1L ; shift dividend (with CARRY)
- rol r_arg1H
- rol r_arg1HL
- rol r_arg1HH
- dec r_cnt ; decrement loop counter
- brne __udivmodsi4_loop
- ; __zero_reg__ now restored (r_cnt == 0)
- com r_arg1L
- com r_arg1H
- com r_arg1HL
- com r_arg1HH
- ; div/mod results to return registers, as for the ldiv() function
- mov_l r_arg2L, r_arg1L ; quotient
- mov_h r_arg2H, r_arg1H
- mov_l r_arg2HL, r_arg1HL
- mov_h r_arg2HH, r_arg1HH
- mov_l r_arg1L, r_remL ; remainder
- mov_h r_arg1H, r_remH
- mov_l r_arg1HL, r_remHL
- mov_h r_arg1HH, r_remHH
- ret
- ENDF __udivmodsi4
- #endif /* defined (L_udivmodsi4) */
- #if defined (L_divmodsi4)
- DEFUN __divmodsi4
- mov __tmp_reg__,r_arg2HH
- bst r_arg1HH,7 ; store sign of dividend
- brtc 0f
- com __tmp_reg__ ; r0.7 is sign of result
- XCALL __negsi2 ; dividend negative: negate
- 0:
- sbrc r_arg2HH,7
- rcall __divmodsi4_neg2 ; divisor negative: negate
- XCALL __udivmodsi4 ; do the unsigned div/mod
- sbrc __tmp_reg__, 7 ; correct quotient sign
- rcall __divmodsi4_neg2
- brtc __divmodsi4_exit ; correct remainder sign
- XJMP __negsi2
- __divmodsi4_neg2:
- ;; correct divisor/quotient sign
- com r_arg2HH
- com r_arg2HL
- com r_arg2H
- neg r_arg2L
- sbci r_arg2H,0xff
- sbci r_arg2HL,0xff
- sbci r_arg2HH,0xff
- __divmodsi4_exit:
- ret
- ENDF __divmodsi4
- #endif /* defined (L_divmodsi4) */
- #if defined (L_negsi2)
- ;; (set (reg:SI 22)
- ;; (neg:SI (reg:SI 22)))
- ;; Sets the V flag for signed overflow tests
- DEFUN __negsi2
- NEG4 22
- ret
- ENDF __negsi2
- #endif /* L_negsi2 */
- #undef r_remHH
- #undef r_remHL
- #undef r_remH
- #undef r_remL
- #undef r_arg1HH
- #undef r_arg1HL
- #undef r_arg1H
- #undef r_arg1L
- #undef r_arg2HH
- #undef r_arg2HL
- #undef r_arg2H
- #undef r_arg2L
- #undef r_cnt
- /* *di routines use registers below R19 and won't work with tiny arch
- right now. */
- #if !defined (__AVR_TINY__)
- /*******************************************************
- Division 64 / 64
- Modulo 64 % 64
- *******************************************************/
- ;; Use Speed-optimized Version on "big" Devices, i.e. Devices with
- ;; at least 16k of Program Memory. For smaller Devices, depend
- ;; on MOVW and SP Size. There is a Connexion between SP Size and
- ;; Flash Size so that SP Size can be used to test for Flash Size.
- #if defined (__AVR_HAVE_JMP_CALL__)
- # define SPEED_DIV 8
- #elif defined (__AVR_HAVE_MOVW__) && defined (__AVR_HAVE_SPH__)
- # define SPEED_DIV 16
- #else
- # define SPEED_DIV 0
- #endif
- ;; A[0..7]: In: Dividend;
- ;; Out: Quotient (T = 0)
- ;; Out: Remainder (T = 1)
- #define A0 18
- #define A1 A0+1
- #define A2 A0+2
- #define A3 A0+3
- #define A4 A0+4
- #define A5 A0+5
- #define A6 A0+6
- #define A7 A0+7
- ;; B[0..7]: In: Divisor; Out: Clobber
- #define B0 10
- #define B1 B0+1
- #define B2 B0+2
- #define B3 B0+3
- #define B4 B0+4
- #define B5 B0+5
- #define B6 B0+6
- #define B7 B0+7
- ;; C[0..7]: Expand remainder; Out: Remainder (unused)
- #define C0 8
- #define C1 C0+1
- #define C2 30
- #define C3 C2+1
- #define C4 28
- #define C5 C4+1
- #define C6 26
- #define C7 C6+1
- ;; Holds Signs during Division Routine
- #define SS __tmp_reg__
- ;; Bit-Counter in Division Routine
- #define R_cnt __zero_reg__
- ;; Scratch Register for Negation
- #define NN r31
- #if defined (L_udivdi3)
- ;; R25:R18 = R24:R18 umod R17:R10
- ;; Ordinary ABI-Function
- DEFUN __umoddi3
- set
- rjmp __udivdi3_umoddi3
- ENDF __umoddi3
- ;; R25:R18 = R24:R18 udiv R17:R10
- ;; Ordinary ABI-Function
- DEFUN __udivdi3
- clt
- ENDF __udivdi3
- DEFUN __udivdi3_umoddi3
- push C0
- push C1
- push C4
- push C5
- XCALL __udivmod64
- pop C5
- pop C4
- pop C1
- pop C0
- ret
- ENDF __udivdi3_umoddi3
- #endif /* L_udivdi3 */
- #if defined (L_udivmod64)
- ;; Worker Routine for 64-Bit unsigned Quotient and Remainder Computation
- ;; No Registers saved/restored; the Callers will take Care.
- ;; Preserves B[] and T-flag
- ;; T = 0: Compute Quotient in A[]
- ;; T = 1: Compute Remainder in A[] and shift SS one Bit left
- DEFUN __udivmod64
- ;; Clear Remainder (C6, C7 will follow)
- clr C0
- clr C1
- wmov C2, C0
- wmov C4, C0
- ldi C7, 64
- #if SPEED_DIV == 0 || SPEED_DIV == 16
- ;; Initialize Loop-Counter
- mov R_cnt, C7
- wmov C6, C0
- #endif /* SPEED_DIV */
- #if SPEED_DIV == 8
- push A7
- clr C6
- 1: ;; Compare shifted Devidend against Divisor
- ;; If -- even after Shifting -- it is smaller...
- CP A7,B0 $ cpc C0,B1 $ cpc C1,B2 $ cpc C2,B3
- cpc C3,B4 $ cpc C4,B5 $ cpc C5,B6 $ cpc C6,B7
- brcc 2f
- ;; ...then we can subtract it. Thus, it is legal to shift left
- $ mov C6,C5 $ mov C5,C4 $ mov C4,C3
- mov C3,C2 $ mov C2,C1 $ mov C1,C0 $ mov C0,A7
- mov A7,A6 $ mov A6,A5 $ mov A5,A4 $ mov A4,A3
- mov A3,A2 $ mov A2,A1 $ mov A1,A0 $ clr A0
- ;; 8 Bits are done
- subi C7, 8
- brne 1b
- ;; Shifted 64 Bits: A7 has traveled to C7
- pop C7
- ;; Divisor is greater than Dividend. We have:
- ;; A[] % B[] = A[]
- ;; A[] / B[] = 0
- ;; Thus, we can return immediately
- rjmp 5f
- 2: ;; Initialze Bit-Counter with Number of Bits still to be performed
- mov R_cnt, C7
- ;; Push of A7 is not needed because C7 is still 0
- pop C7
- clr C7
- #elif SPEED_DIV == 16
- ;; Compare shifted Dividend against Divisor
- cp A7, B3
- cpc C0, B4
- cpc C1, B5
- cpc C2, B6
- cpc C3, B7
- brcc 2f
- ;; Divisor is greater than shifted Dividen: We can shift the Dividend
- ;; and it is still smaller than the Divisor --> Shift one 32-Bit Chunk
- wmov C2,A6 $ wmov C0,A4
- wmov A6,A2 $ wmov A4,A0
- wmov A2,C6 $ wmov A0,C4
- ;; Set Bit Counter to 32
- lsr R_cnt
- 2:
- #elif SPEED_DIV
- #error SPEED_DIV = ?
- #endif /* SPEED_DIV */
- ;; The very Division + Remainder Routine
- 3: ;; Left-shift Dividend...
- lsl A0 $ rol A1 $ rol A2 $ rol A3
- rol A4 $ rol A5 $ rol A6 $ rol A7
- ;; ...into Remainder
- rol C0 $ rol C1 $ rol C2 $ rol C3
- rol C4 $ rol C5 $ rol C6 $ rol C7
- ;; Compare Remainder and Divisor
- CP C0,B0 $ cpc C1,B1 $ cpc C2,B2 $ cpc C3,B3
- cpc C4,B4 $ cpc C5,B5 $ cpc C6,B6 $ cpc C7,B7
- brcs 4f
- ;; Divisor fits into Remainder: Subtract it from Remainder...
- SUB C0,B0 $ sbc C1,B1 $ sbc C2,B2 $ sbc C3,B3
- sbc C4,B4 $ sbc C5,B5 $ sbc C6,B6 $ sbc C7,B7
- ;; ...and set according Bit in the upcoming Quotient
- ;; The Bit will travel to its final Position
- ori A0, 1
- 4: ;; This Bit is done
- dec R_cnt
- brne 3b
- ;; __zero_reg__ is 0 again
- ;; T = 0: We are fine with the Quotient in A[]
- ;; T = 1: Copy Remainder to A[]
- 5: brtc 6f
- wmov A0, C0
- wmov A2, C2
- wmov A4, C4
- wmov A6, C6
- ;; Move the Sign of the Result to SS.7
- lsl SS
- 6: ret
- ENDF __udivmod64
- #endif /* L_udivmod64 */
- #if defined (L_divdi3)
- ;; R25:R18 = R24:R18 mod R17:R10
- ;; Ordinary ABI-Function
- DEFUN __moddi3
- set
- rjmp __divdi3_moddi3
- ENDF __moddi3
- ;; R25:R18 = R24:R18 div R17:R10
- ;; Ordinary ABI-Function
- DEFUN __divdi3
- clt
- ENDF __divdi3
- DEFUN __divdi3_moddi3
- #if SPEED_DIV
- mov r31, A7
- or r31, B7
- brmi 0f
- ;; Both Signs are 0: the following Complexitiy is not needed
- XJMP __udivdi3_umoddi3
- #endif /* SPEED_DIV */
- 0: ;; The Prologue
- ;; Save 12 Registers: Y, 17...8
- ;; No Frame needed
- do_prologue_saves 12
- ;; SS.7 will contain the Sign of the Quotient (A.sign * B.sign)
- ;; SS.6 will contain the Sign of the Remainder (A.sign)
- mov SS, A7
- asr SS
- ;; Adjust Dividend's Sign as needed
- #if SPEED_DIV
- ;; Compiling for Speed we know that at least one Sign must be < 0
- ;; Thus, if A[] >= 0 then we know B[] < 0
- brpl 22f
- #else
- brpl 21f
- #endif /* SPEED_DIV */
- XCALL __negdi2
- ;; Adjust Divisor's Sign and SS.7 as needed
- 21: tst B7
- brpl 3f
- 22: ldi NN, 1 << 7
- eor SS, NN
- ldi NN, -1
- com B4 $ com B5 $ com B6 $ com B7
- $ com B1 $ com B2 $ com B3
- NEG B0
- $ sbc B1,NN $ sbc B2,NN $ sbc B3,NN
- sbc B4,NN $ sbc B5,NN $ sbc B6,NN $ sbc B7,NN
- 3: ;; Do the unsigned 64-Bit Division/Modulo (depending on T-flag)
- XCALL __udivmod64
- ;; Adjust Result's Sign
- #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
- tst SS
- brpl 4f
- #else
- sbrc SS, 7
- #endif /* __AVR_HAVE_JMP_CALL__ */
- XCALL __negdi2
- 4: ;; Epilogue: Restore 12 Registers and return
- do_epilogue_restores 12
- ENDF __divdi3_moddi3
- #endif /* L_divdi3 */
- #undef R_cnt
- #undef SS
- #undef NN
- .section .text.libgcc, "ax", @progbits
- #define TT __tmp_reg__
- #if defined (L_adddi3)
- ;; (set (reg:DI 18)
- ;; (plus:DI (reg:DI 18)
- ;; (reg:DI 10)))
- ;; Sets the V flag for signed overflow tests
- ;; Sets the C flag for unsigned overflow tests
- DEFUN __adddi3
- ADD A0,B0 $ adc A1,B1 $ adc A2,B2 $ adc A3,B3
- adc A4,B4 $ adc A5,B5 $ adc A6,B6 $ adc A7,B7
- ret
- ENDF __adddi3
- #endif /* L_adddi3 */
- #if defined (L_adddi3_s8)
- ;; (set (reg:DI 18)
- ;; (plus:DI (reg:DI 18)
- ;; (sign_extend:SI (reg:QI 26))))
- ;; Sets the V flag for signed overflow tests
- ;; Sets the C flag for unsigned overflow tests provided 0 <= R26 < 128
- DEFUN __adddi3_s8
- clr TT
- sbrc r26, 7
- com TT
- ADD A0,r26 $ adc A1,TT $ adc A2,TT $ adc A3,TT
- adc A4,TT $ adc A5,TT $ adc A6,TT $ adc A7,TT
- ret
- ENDF __adddi3_s8
- #endif /* L_adddi3_s8 */
- #if defined (L_subdi3)
- ;; (set (reg:DI 18)
- ;; (minus:DI (reg:DI 18)
- ;; (reg:DI 10)))
- ;; Sets the V flag for signed overflow tests
- ;; Sets the C flag for unsigned overflow tests
- DEFUN __subdi3
- SUB A0,B0 $ sbc A1,B1 $ sbc A2,B2 $ sbc A3,B3
- sbc A4,B4 $ sbc A5,B5 $ sbc A6,B6 $ sbc A7,B7
- ret
- ENDF __subdi3
- #endif /* L_subdi3 */
- #if defined (L_cmpdi2)
- ;; (set (cc0)
- ;; (compare (reg:DI 18)
- ;; (reg:DI 10)))
- DEFUN __cmpdi2
- CP A0,B0 $ cpc A1,B1 $ cpc A2,B2 $ cpc A3,B3
- cpc A4,B4 $ cpc A5,B5 $ cpc A6,B6 $ cpc A7,B7
- ret
- ENDF __cmpdi2
- #endif /* L_cmpdi2 */
- #if defined (L_cmpdi2_s8)
- ;; (set (cc0)
- ;; (compare (reg:DI 18)
- ;; (sign_extend:SI (reg:QI 26))))
- DEFUN __cmpdi2_s8
- clr TT
- sbrc r26, 7
- com TT
- CP A0,r26 $ cpc A1,TT $ cpc A2,TT $ cpc A3,TT
- cpc A4,TT $ cpc A5,TT $ cpc A6,TT $ cpc A7,TT
- ret
- ENDF __cmpdi2_s8
- #endif /* L_cmpdi2_s8 */
- #if defined (L_negdi2)
- ;; (set (reg:DI 18)
- ;; (neg:DI (reg:DI 18)))
- ;; Sets the V flag for signed overflow tests
- DEFUN __negdi2
- com A4 $ com A5 $ com A6 $ com A7
- $ com A1 $ com A2 $ com A3
- NEG A0
- $ sbci A1,-1 $ sbci A2,-1 $ sbci A3,-1
- sbci A4,-1 $ sbci A5,-1 $ sbci A6,-1 $ sbci A7,-1
- ret
- ENDF __negdi2
- #endif /* L_negdi2 */
- #undef TT
- #undef C7
- #undef C6
- #undef C5
- #undef C4
- #undef C3
- #undef C2
- #undef C1
- #undef C0
- #undef B7
- #undef B6
- #undef B5
- #undef B4
- #undef B3
- #undef B2
- #undef B1
- #undef B0
- #undef A7
- #undef A6
- #undef A5
- #undef A4
- #undef A3
- #undef A2
- #undef A1
- #undef A0
- #endif /* !defined (__AVR_TINY__) */
- .section .text.libgcc.prologue, "ax", @progbits
- /**********************************
- * This is a prologue subroutine
- **********************************/
- #if !defined (__AVR_TINY__)
- #if defined (L_prologue)
- ;; This function does not clobber T-flag; 64-bit division relies on it
- DEFUN __prologue_saves__
- push r2
- push r3
- push r4
- push r5
- push r6
- push r7
- push r8
- push r9
- push r10
- push r11
- push r12
- push r13
- push r14
- push r15
- push r16
- push r17
- push r28
- push r29
- #if !defined (__AVR_HAVE_SPH__)
- in r28,__SP_L__
- sub r28,r26
- out __SP_L__,r28
- clr r29
- #elif defined (__AVR_XMEGA__)
- in r28,__SP_L__
- in r29,__SP_H__
- sub r28,r26
- sbc r29,r27
- out __SP_L__,r28
- out __SP_H__,r29
- #else
- in r28,__SP_L__
- in r29,__SP_H__
- sub r28,r26
- sbc r29,r27
- in __tmp_reg__,__SREG__
- cli
- out __SP_H__,r29
- out __SREG__,__tmp_reg__
- out __SP_L__,r28
- #endif /* #SP = 8/16 */
- XIJMP
- ENDF __prologue_saves__
- #endif /* defined (L_prologue) */
- /*
- * This is an epilogue subroutine
- */
- #if defined (L_epilogue)
- DEFUN __epilogue_restores__
- ldd r2,Y+18
- ldd r3,Y+17
- ldd r4,Y+16
- ldd r5,Y+15
- ldd r6,Y+14
- ldd r7,Y+13
- ldd r8,Y+12
- ldd r9,Y+11
- ldd r10,Y+10
- ldd r11,Y+9
- ldd r12,Y+8
- ldd r13,Y+7
- ldd r14,Y+6
- ldd r15,Y+5
- ldd r16,Y+4
- ldd r17,Y+3
- ldd r26,Y+2
- #if !defined (__AVR_HAVE_SPH__)
- ldd r29,Y+1
- add r28,r30
- out __SP_L__,r28
- mov r28, r26
- #elif defined (__AVR_XMEGA__)
- ldd r27,Y+1
- add r28,r30
- adc r29,__zero_reg__
- out __SP_L__,r28
- out __SP_H__,r29
- wmov 28, 26
- #else
- ldd r27,Y+1
- add r28,r30
- adc r29,__zero_reg__
- in __tmp_reg__,__SREG__
- cli
- out __SP_H__,r29
- out __SREG__,__tmp_reg__
- out __SP_L__,r28
- mov_l r28, r26
- mov_h r29, r27
- #endif /* #SP = 8/16 */
- ret
- ENDF __epilogue_restores__
- #endif /* defined (L_epilogue) */
- #endif /* !defined (__AVR_TINY__) */
- #ifdef L_exit
- .section .fini9,"ax",@progbits
- DEFUN _exit
- .weak exit
- exit:
- ENDF _exit
- /* Code from .fini8 ... .fini1 sections inserted by ld script. */
- .section .fini0,"ax",@progbits
- cli
- __stop_program:
- rjmp __stop_program
- #endif /* defined (L_exit) */
- #ifdef L_cleanup
- .weak _cleanup
- .func _cleanup
- _cleanup:
- ret
- .endfunc
- #endif /* defined (L_cleanup) */
- .section .text.libgcc, "ax", @progbits
- #ifdef L_tablejump2
- DEFUN __tablejump2__
- lsl r30
- rol r31
- #if defined (__AVR_HAVE_EIJMP_EICALL__)
- ;; Word address of gs() jumptable entry in R24:Z
- rol r24
- out __RAMPZ__, r24
- #elif defined (__AVR_HAVE_ELPM__)
- ;; Word address of jumptable entry in Z
- clr __tmp_reg__
- rol __tmp_reg__
- out __RAMPZ__, __tmp_reg__
- #endif
- ;; Read word address from jumptable and jump
- #if defined (__AVR_HAVE_ELPMX__)
- elpm __tmp_reg__, Z+
- elpm r31, Z
- mov r30, __tmp_reg__
- #ifdef __AVR_HAVE_RAMPD__
- ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
- out __RAMPZ__, __zero_reg__
- #endif /* RAMPD */
- XIJMP
- #elif defined (__AVR_HAVE_ELPM__)
- elpm
- push r0
- adiw r30, 1
- elpm
- push r0
- ret
- #elif defined (__AVR_HAVE_LPMX__)
- lpm __tmp_reg__, Z+
- lpm r31, Z
- mov r30, __tmp_reg__
- ijmp
- #elif defined (__AVR_TINY__)
- wsubi 30, -(__AVR_TINY_PM_BASE_ADDRESS__) ; Add PM offset to Z
- ld __tmp_reg__, Z+
- ld r31, Z ; Use ld instead of lpm to load Z
- mov r30, __tmp_reg__
- ijmp
- #else
- lpm
- push r0
- adiw r30, 1
- lpm
- push r0
- ret
- #endif
- ENDF __tablejump2__
- #endif /* L_tablejump2 */
- #if defined(__AVR_TINY__)
- #ifdef L_copy_data
- .section .init4,"ax",@progbits
- .global __do_copy_data
- __do_copy_data:
- ldi r18, hi8(__data_end)
- ldi r26, lo8(__data_start)
- ldi r27, hi8(__data_start)
- ldi r30, lo8(__data_load_start + __AVR_TINY_PM_BASE_ADDRESS__)
- ldi r31, hi8(__data_load_start + __AVR_TINY_PM_BASE_ADDRESS__)
- rjmp .L__do_copy_data_start
- .L__do_copy_data_loop:
- ld r19, z+
- st X+, r19
- .L__do_copy_data_start:
- cpi r26, lo8(__data_end)
- cpc r27, r18
- brne .L__do_copy_data_loop
- #endif
- #else
- #ifdef L_copy_data
- .section .init4,"ax",@progbits
- DEFUN __do_copy_data
- #if defined(__AVR_HAVE_ELPMX__)
- ldi r17, hi8(__data_end)
- ldi r26, lo8(__data_start)
- ldi r27, hi8(__data_start)
- ldi r30, lo8(__data_load_start)
- ldi r31, hi8(__data_load_start)
- ldi r16, hh8(__data_load_start)
- out __RAMPZ__, r16
- rjmp .L__do_copy_data_start
- .L__do_copy_data_loop:
- elpm r0, Z+
- st X+, r0
- .L__do_copy_data_start:
- cpi r26, lo8(__data_end)
- cpc r27, r17
- brne .L__do_copy_data_loop
- #elif !defined(__AVR_HAVE_ELPMX__) && defined(__AVR_HAVE_ELPM__)
- ldi r17, hi8(__data_end)
- ldi r26, lo8(__data_start)
- ldi r27, hi8(__data_start)
- ldi r30, lo8(__data_load_start)
- ldi r31, hi8(__data_load_start)
- ldi r16, hh8(__data_load_start - 0x10000)
- .L__do_copy_data_carry:
- inc r16
- out __RAMPZ__, r16
- rjmp .L__do_copy_data_start
- .L__do_copy_data_loop:
- elpm
- st X+, r0
- adiw r30, 1
- brcs .L__do_copy_data_carry
- .L__do_copy_data_start:
- cpi r26, lo8(__data_end)
- cpc r27, r17
- brne .L__do_copy_data_loop
- #elif !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__)
- ldi r17, hi8(__data_end)
- ldi r26, lo8(__data_start)
- ldi r27, hi8(__data_start)
- ldi r30, lo8(__data_load_start)
- ldi r31, hi8(__data_load_start)
- rjmp .L__do_copy_data_start
- .L__do_copy_data_loop:
- #if defined (__AVR_HAVE_LPMX__)
- lpm r0, Z+
- #else
- lpm
- adiw r30, 1
- #endif
- st X+, r0
- .L__do_copy_data_start:
- cpi r26, lo8(__data_end)
- cpc r27, r17
- brne .L__do_copy_data_loop
- #endif /* !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__) */
- #if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
- ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
- out __RAMPZ__, __zero_reg__
- #endif /* ELPM && RAMPD */
- ENDF __do_copy_data
- #endif /* L_copy_data */
- #endif /* !defined (__AVR_TINY__) */
- /* __do_clear_bss is only necessary if there is anything in .bss section. */
- #ifdef L_clear_bss
- .section .init4,"ax",@progbits
- DEFUN __do_clear_bss
- ldi r18, hi8(__bss_end)
- ldi r26, lo8(__bss_start)
- ldi r27, hi8(__bss_start)
- rjmp .do_clear_bss_start
- .do_clear_bss_loop:
- st X+, __zero_reg__
- .do_clear_bss_start:
- cpi r26, lo8(__bss_end)
- cpc r27, r18
- brne .do_clear_bss_loop
- ENDF __do_clear_bss
- #endif /* L_clear_bss */
- /* __do_global_ctors and __do_global_dtors are only necessary
- if there are any constructors/destructors. */
- #if defined(__AVR_TINY__)
- #define cdtors_tst_reg r18
- #else
- #define cdtors_tst_reg r17
- #endif
- #ifdef L_ctors
- .section .init6,"ax",@progbits
- DEFUN __do_global_ctors
- ldi cdtors_tst_reg, pm_hi8(__ctors_start)
- ldi r28, pm_lo8(__ctors_end)
- ldi r29, pm_hi8(__ctors_end)
- #ifdef __AVR_HAVE_EIJMP_EICALL__
- ldi r16, pm_hh8(__ctors_end)
- #endif /* HAVE_EIJMP */
- rjmp .L__do_global_ctors_start
- .L__do_global_ctors_loop:
- wsubi 28, 1
- #ifdef __AVR_HAVE_EIJMP_EICALL__
- sbc r16, __zero_reg__
- mov r24, r16
- #endif /* HAVE_EIJMP */
- mov_h r31, r29
- mov_l r30, r28
- XCALL __tablejump2__
- .L__do_global_ctors_start:
- cpi r28, pm_lo8(__ctors_start)
- cpc r29, cdtors_tst_reg
- #ifdef __AVR_HAVE_EIJMP_EICALL__
- ldi r24, pm_hh8(__ctors_start)
- cpc r16, r24
- #endif /* HAVE_EIJMP */
- brne .L__do_global_ctors_loop
- ENDF __do_global_ctors
- #endif /* L_ctors */
- #ifdef L_dtors
- .section .fini6,"ax",@progbits
- DEFUN __do_global_dtors
- ldi cdtors_tst_reg, pm_hi8(__dtors_end)
- ldi r28, pm_lo8(__dtors_start)
- ldi r29, pm_hi8(__dtors_start)
- #ifdef __AVR_HAVE_EIJMP_EICALL__
- ldi r16, pm_hh8(__dtors_start)
- #endif /* HAVE_EIJMP */
- rjmp .L__do_global_dtors_start
- .L__do_global_dtors_loop:
- #ifdef __AVR_HAVE_EIJMP_EICALL__
- mov r24, r16
- #endif /* HAVE_EIJMP */
- mov_h r31, r29
- mov_l r30, r28
- XCALL __tablejump2__
- waddi 28, 1
- #ifdef __AVR_HAVE_EIJMP_EICALL__
- adc r16, __zero_reg__
- #endif /* HAVE_EIJMP */
- .L__do_global_dtors_start:
- cpi r28, pm_lo8(__dtors_end)
- cpc r29, cdtors_tst_reg
- #ifdef __AVR_HAVE_EIJMP_EICALL__
- ldi r24, pm_hh8(__dtors_end)
- cpc r16, r24
- #endif /* HAVE_EIJMP */
- brne .L__do_global_dtors_loop
- ENDF __do_global_dtors
- #endif /* L_dtors */
- #undef cdtors_tst_reg
- .section .text.libgcc, "ax", @progbits
- #if !defined (__AVR_TINY__)
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- ;; Loading n bytes from Flash; n = 3,4
- ;; R22... = Flash[Z]
- ;; Clobbers: __tmp_reg__
- #if (defined (L_load_3) \
- || defined (L_load_4)) \
- && !defined (__AVR_HAVE_LPMX__)
- ;; Destination
- #define D0 22
- #define D1 D0+1
- #define D2 D0+2
- #define D3 D0+3
- .macro .load dest, n
- lpm
- mov \dest, r0
- .if \dest != D0+\n-1
- adiw r30, 1
- .else
- sbiw r30, \n-1
- .endif
- .endm
- #if defined (L_load_3)
- DEFUN __load_3
- push D3
- XCALL __load_4
- pop D3
- ret
- ENDF __load_3
- #endif /* L_load_3 */
- #if defined (L_load_4)
- DEFUN __load_4
- .load D0, 4
- .load D1, 4
- .load D2, 4
- .load D3, 4
- ret
- ENDF __load_4
- #endif /* L_load_4 */
- #endif /* L_load_3 || L_load_3 */
- #endif /* !defined (__AVR_TINY__) */
- #if !defined (__AVR_TINY__)
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- ;; Loading n bytes from Flash or RAM; n = 1,2,3,4
- ;; R22... = Flash[R21:Z] or RAM[Z] depending on R21.7
- ;; Clobbers: __tmp_reg__, R21, R30, R31
- #if (defined (L_xload_1) \
- || defined (L_xload_2) \
- || defined (L_xload_3) \
- || defined (L_xload_4))
- ;; Destination
- #define D0 22
- #define D1 D0+1
- #define D2 D0+2
- #define D3 D0+3
- ;; Register containing bits 16+ of the address
- #define HHI8 21
- .macro .xload dest, n
- #if defined (__AVR_HAVE_ELPMX__)
- elpm \dest, Z+
- #elif defined (__AVR_HAVE_ELPM__)
- elpm
- mov \dest, r0
- .if \dest != D0+\n-1
- adiw r30, 1
- adc HHI8, __zero_reg__
- out __RAMPZ__, HHI8
- .endif
- #elif defined (__AVR_HAVE_LPMX__)
- lpm \dest, Z+
- #else
- lpm
- mov \dest, r0
- .if \dest != D0+\n-1
- adiw r30, 1
- .endif
- #endif
- #if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
- .if \dest == D0+\n-1
- ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
- out __RAMPZ__, __zero_reg__
- .endif
- #endif
- .endm ; .xload
- #if defined (L_xload_1)
- DEFUN __xload_1
- #if defined (__AVR_HAVE_LPMX__) && !defined (__AVR_HAVE_ELPM__)
- sbrc HHI8, 7
- ld D0, Z
- sbrs HHI8, 7
- lpm D0, Z
- ret
- #else
- sbrc HHI8, 7
- rjmp 1f
- #if defined (__AVR_HAVE_ELPM__)
- out __RAMPZ__, HHI8
- #endif /* __AVR_HAVE_ELPM__ */
- .xload D0, 1
- ret
- 1: ld D0, Z
- ret
- #endif /* LPMx && ! ELPM */
- ENDF __xload_1
- #endif /* L_xload_1 */
- #if defined (L_xload_2)
- DEFUN __xload_2
- sbrc HHI8, 7
- rjmp 1f
- #if defined (__AVR_HAVE_ELPM__)
- out __RAMPZ__, HHI8
- #endif /* __AVR_HAVE_ELPM__ */
- .xload D0, 2
- .xload D1, 2
- ret
- 1: ld D0, Z+
- ld D1, Z+
- ret
- ENDF __xload_2
- #endif /* L_xload_2 */
- #if defined (L_xload_3)
- DEFUN __xload_3
- sbrc HHI8, 7
- rjmp 1f
- #if defined (__AVR_HAVE_ELPM__)
- out __RAMPZ__, HHI8
- #endif /* __AVR_HAVE_ELPM__ */
- .xload D0, 3
- .xload D1, 3
- .xload D2, 3
- ret
- 1: ld D0, Z+
- ld D1, Z+
- ld D2, Z+
- ret
- ENDF __xload_3
- #endif /* L_xload_3 */
- #if defined (L_xload_4)
- DEFUN __xload_4
- sbrc HHI8, 7
- rjmp 1f
- #if defined (__AVR_HAVE_ELPM__)
- out __RAMPZ__, HHI8
- #endif /* __AVR_HAVE_ELPM__ */
- .xload D0, 4
- .xload D1, 4
- .xload D2, 4
- .xload D3, 4
- ret
- 1: ld D0, Z+
- ld D1, Z+
- ld D2, Z+
- ld D3, Z+
- ret
- ENDF __xload_4
- #endif /* L_xload_4 */
- #endif /* L_xload_{1|2|3|4} */
- #endif /* if !defined (__AVR_TINY__) */
- #if !defined (__AVR_TINY__)
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- ;; memcopy from Address Space __pgmx to RAM
- ;; R23:Z = Source Address
- ;; X = Destination Address
- ;; Clobbers: __tmp_reg__, R23, R24, R25, X, Z
- #if defined (L_movmemx)
- #define HHI8 23
- #define LOOP 24
- DEFUN __movmemx_qi
- ;; #Bytes to copy fity in 8 Bits (1..255)
- ;; Zero-extend Loop Counter
- clr LOOP+1
- ;; FALLTHRU
- ENDF __movmemx_qi
- DEFUN __movmemx_hi
- ;; Read from where?
- sbrc HHI8, 7
- rjmp 1f
- ;; Read from Flash
- #if defined (__AVR_HAVE_ELPM__)
- out __RAMPZ__, HHI8
- #endif
- 0: ;; Load 1 Byte from Flash...
- #if defined (__AVR_HAVE_ELPMX__)
- elpm r0, Z+
- #elif defined (__AVR_HAVE_ELPM__)
- elpm
- adiw r30, 1
- adc HHI8, __zero_reg__
- out __RAMPZ__, HHI8
- #elif defined (__AVR_HAVE_LPMX__)
- lpm r0, Z+
- #else
- lpm
- adiw r30, 1
- #endif
- ;; ...and store that Byte to RAM Destination
- st X+, r0
- sbiw LOOP, 1
- brne 0b
- #if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
- ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
- out __RAMPZ__, __zero_reg__
- #endif /* ELPM && RAMPD */
- ret
- ;; Read from RAM
- 1: ;; Read 1 Byte from RAM...
- ld r0, Z+
- ;; and store that Byte to RAM Destination
- st X+, r0
- sbiw LOOP, 1
- brne 1b
- ret
- ENDF __movmemx_hi
- #undef HHI8
- #undef LOOP
- #endif /* L_movmemx */
- #endif /* !defined (__AVR_TINY__) */
- .section .text.libgcc.builtins, "ax", @progbits
- /**********************************
- * Find first set Bit (ffs)
- **********************************/
- #if defined (L_ffssi2)
- ;; find first set bit
- ;; r25:r24 = ffs32 (r25:r22)
- ;; clobbers: r22, r26
- DEFUN __ffssi2
- clr r26
- tst r22
- brne 1f
- subi r26, -8
- or r22, r23
- brne 1f
- subi r26, -8
- or r22, r24
- brne 1f
- subi r26, -8
- or r22, r25
- brne 1f
- ret
- 1: mov r24, r22
- XJMP __loop_ffsqi2
- ENDF __ffssi2
- #endif /* defined (L_ffssi2) */
- #if defined (L_ffshi2)
- ;; find first set bit
- ;; r25:r24 = ffs16 (r25:r24)
- ;; clobbers: r26
- DEFUN __ffshi2
- clr r26
- #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
- ;; Some cores have problem skipping 2-word instruction
- tst r24
- breq 2f
- #else
- cpse r24, __zero_reg__
- #endif /* __AVR_HAVE_JMP_CALL__ */
- 1: XJMP __loop_ffsqi2
- 2: ldi r26, 8
- or r24, r25
- brne 1b
- ret
- ENDF __ffshi2
- #endif /* defined (L_ffshi2) */
- #if defined (L_loop_ffsqi2)
- ;; Helper for ffshi2, ffssi2
- ;; r25:r24 = r26 + zero_extend16 (ffs8(r24))
- ;; r24 must be != 0
- ;; clobbers: r26
- DEFUN __loop_ffsqi2
- inc r26
- lsr r24
- brcc __loop_ffsqi2
- mov r24, r26
- clr r25
- ret
- ENDF __loop_ffsqi2
- #endif /* defined (L_loop_ffsqi2) */
- /**********************************
- * Count trailing Zeros (ctz)
- **********************************/
- #if defined (L_ctzsi2)
- ;; count trailing zeros
- ;; r25:r24 = ctz32 (r25:r22)
- ;; clobbers: r26, r22
- ;; ctz(0) = 255
- ;; Note that ctz(0) in undefined for GCC
- DEFUN __ctzsi2
- XCALL __ffssi2
- dec r24
- ret
- ENDF __ctzsi2
- #endif /* defined (L_ctzsi2) */
- #if defined (L_ctzhi2)
- ;; count trailing zeros
- ;; r25:r24 = ctz16 (r25:r24)
- ;; clobbers: r26
- ;; ctz(0) = 255
- ;; Note that ctz(0) in undefined for GCC
- DEFUN __ctzhi2
- XCALL __ffshi2
- dec r24
- ret
- ENDF __ctzhi2
- #endif /* defined (L_ctzhi2) */
- /**********************************
- * Count leading Zeros (clz)
- **********************************/
- #if defined (L_clzdi2)
- ;; count leading zeros
- ;; r25:r24 = clz64 (r25:r18)
- ;; clobbers: r22, r23, r26
- DEFUN __clzdi2
- XCALL __clzsi2
- sbrs r24, 5
- ret
- mov_l r22, r18
- mov_h r23, r19
- mov_l r24, r20
- mov_h r25, r21
- XCALL __clzsi2
- subi r24, -32
- ret
- ENDF __clzdi2
- #endif /* defined (L_clzdi2) */
- #if defined (L_clzsi2)
- ;; count leading zeros
- ;; r25:r24 = clz32 (r25:r22)
- ;; clobbers: r26
- DEFUN __clzsi2
- XCALL __clzhi2
- sbrs r24, 4
- ret
- mov_l r24, r22
- mov_h r25, r23
- XCALL __clzhi2
- subi r24, -16
- ret
- ENDF __clzsi2
- #endif /* defined (L_clzsi2) */
- #if defined (L_clzhi2)
- ;; count leading zeros
- ;; r25:r24 = clz16 (r25:r24)
- ;; clobbers: r26
- DEFUN __clzhi2
- clr r26
- tst r25
- brne 1f
- subi r26, -8
- or r25, r24
- brne 1f
- ldi r24, 16
- ret
- 1: cpi r25, 16
- brsh 3f
- subi r26, -3
- swap r25
- 2: inc r26
- 3: lsl r25
- brcc 2b
- mov r24, r26
- clr r25
- ret
- ENDF __clzhi2
- #endif /* defined (L_clzhi2) */
- /**********************************
- * Parity
- **********************************/
- #if defined (L_paritydi2)
- ;; r25:r24 = parity64 (r25:r18)
- ;; clobbers: __tmp_reg__
- DEFUN __paritydi2
- eor r24, r18
- eor r24, r19
- eor r24, r20
- eor r24, r21
- XJMP __paritysi2
- ENDF __paritydi2
- #endif /* defined (L_paritydi2) */
- #if defined (L_paritysi2)
- ;; r25:r24 = parity32 (r25:r22)
- ;; clobbers: __tmp_reg__
- DEFUN __paritysi2
- eor r24, r22
- eor r24, r23
- XJMP __parityhi2
- ENDF __paritysi2
- #endif /* defined (L_paritysi2) */
- #if defined (L_parityhi2)
- ;; r25:r24 = parity16 (r25:r24)
- ;; clobbers: __tmp_reg__
- DEFUN __parityhi2
- eor r24, r25
- ;; FALLTHRU
- ENDF __parityhi2
- ;; r25:r24 = parity8 (r24)
- ;; clobbers: __tmp_reg__
- DEFUN __parityqi2
- ;; parity is in r24[0..7]
- mov __tmp_reg__, r24
- swap __tmp_reg__
- eor r24, __tmp_reg__
- ;; parity is in r24[0..3]
- subi r24, -4
- andi r24, -5
- subi r24, -6
- ;; parity is in r24[0,3]
- sbrc r24, 3
- inc r24
- ;; parity is in r24[0]
- andi r24, 1
- clr r25
- ret
- ENDF __parityqi2
- #endif /* defined (L_parityhi2) */
- /**********************************
- * Population Count
- **********************************/
- #if defined (L_popcounthi2)
- ;; population count
- ;; r25:r24 = popcount16 (r25:r24)
- ;; clobbers: __tmp_reg__
- DEFUN __popcounthi2
- XCALL __popcountqi2
- push r24
- mov r24, r25
- XCALL __popcountqi2
- clr r25
- ;; FALLTHRU
- ENDF __popcounthi2
- DEFUN __popcounthi2_tail
- pop __tmp_reg__
- add r24, __tmp_reg__
- ret
- ENDF __popcounthi2_tail
- #endif /* defined (L_popcounthi2) */
- #if defined (L_popcountsi2)
- ;; population count
- ;; r25:r24 = popcount32 (r25:r22)
- ;; clobbers: __tmp_reg__
- DEFUN __popcountsi2
- XCALL __popcounthi2
- push r24
- mov_l r24, r22
- mov_h r25, r23
- XCALL __popcounthi2
- XJMP __popcounthi2_tail
- ENDF __popcountsi2
- #endif /* defined (L_popcountsi2) */
- #if defined (L_popcountdi2)
- ;; population count
- ;; r25:r24 = popcount64 (r25:r18)
- ;; clobbers: r22, r23, __tmp_reg__
- DEFUN __popcountdi2
- XCALL __popcountsi2
- push r24
- mov_l r22, r18
- mov_h r23, r19
- mov_l r24, r20
- mov_h r25, r21
- XCALL __popcountsi2
- XJMP __popcounthi2_tail
- ENDF __popcountdi2
- #endif /* defined (L_popcountdi2) */
- #if defined (L_popcountqi2)
- ;; population count
- ;; r24 = popcount8 (r24)
- ;; clobbers: __tmp_reg__
- DEFUN __popcountqi2
- mov __tmp_reg__, r24
- andi r24, 1
- lsr __tmp_reg__
- lsr __tmp_reg__
- adc r24, __zero_reg__
- lsr __tmp_reg__
- adc r24, __zero_reg__
- lsr __tmp_reg__
- adc r24, __zero_reg__
- lsr __tmp_reg__
- adc r24, __zero_reg__
- lsr __tmp_reg__
- adc r24, __zero_reg__
- lsr __tmp_reg__
- adc r24, __tmp_reg__
- ret
- ENDF __popcountqi2
- #endif /* defined (L_popcountqi2) */
- /**********************************
- * Swap bytes
- **********************************/
- ;; swap two registers with different register number
- .macro bswap a, b
- eor \a, \b
- eor \b, \a
- eor \a, \b
- .endm
- #if defined (L_bswapsi2)
- ;; swap bytes
- ;; r25:r22 = bswap32 (r25:r22)
- DEFUN __bswapsi2
- bswap r22, r25
- bswap r23, r24
- ret
- ENDF __bswapsi2
- #endif /* defined (L_bswapsi2) */
- #if defined (L_bswapdi2)
- ;; swap bytes
- ;; r25:r18 = bswap64 (r25:r18)
- DEFUN __bswapdi2
- bswap r18, r25
- bswap r19, r24
- bswap r20, r23
- bswap r21, r22
- ret
- ENDF __bswapdi2
- #endif /* defined (L_bswapdi2) */
- /**********************************
- * 64-bit shifts
- **********************************/
- #if defined (L_ashrdi3)
- #define SS __zero_reg__
- ;; Arithmetic shift right
- ;; r25:r18 = ashr64 (r25:r18, r17:r16)
- DEFUN __ashrdi3
- sbrc r25, 7
- com SS
- ;; FALLTHRU
- ENDF __ashrdi3
- ;; Logic shift right
- ;; r25:r18 = lshr64 (r25:r18, r17:r16)
- DEFUN __lshrdi3
- ;; Signs are in SS (zero_reg)
- mov __tmp_reg__, r16
- 0: cpi r16, 8
- brlo 2f
- subi r16, 8
- mov r18, r19
- mov r19, r20
- mov r20, r21
- mov r21, r22
- mov r22, r23
- mov r23, r24
- mov r24, r25
- mov r25, SS
- rjmp 0b
- 1: asr SS
- ror r25
- ror r24
- ror r23
- ror r22
- ror r21
- ror r20
- ror r19
- ror r18
- 2: dec r16
- brpl 1b
- clr __zero_reg__
- mov r16, __tmp_reg__
- ret
- ENDF __lshrdi3
- #undef SS
- #endif /* defined (L_ashrdi3) */
- #if defined (L_ashldi3)
- ;; Shift left
- ;; r25:r18 = ashl64 (r25:r18, r17:r16)
- ;; This function does not clobber T.
- DEFUN __ashldi3
- mov __tmp_reg__, r16
- 0: cpi r16, 8
- brlo 2f
- mov r25, r24
- mov r24, r23
- mov r23, r22
- mov r22, r21
- mov r21, r20
- mov r20, r19
- mov r19, r18
- clr r18
- subi r16, 8
- rjmp 0b
- 1: lsl r18
- rol r19
- rol r20
- rol r21
- rol r22
- rol r23
- rol r24
- rol r25
- 2: dec r16
- brpl 1b
- mov r16, __tmp_reg__
- ret
- ENDF __ashldi3
- #endif /* defined (L_ashldi3) */
- #if defined (L_rotldi3)
- ;; Rotate left
- ;; r25:r18 = rotl64 (r25:r18, r17:r16)
- DEFUN __rotldi3
- push r16
- 0: cpi r16, 8
- brlo 2f
- subi r16, 8
- mov __tmp_reg__, r25
- mov r25, r24
- mov r24, r23
- mov r23, r22
- mov r22, r21
- mov r21, r20
- mov r20, r19
- mov r19, r18
- mov r18, __tmp_reg__
- rjmp 0b
- 1: lsl r18
- rol r19
- rol r20
- rol r21
- rol r22
- rol r23
- rol r24
- rol r25
- adc r18, __zero_reg__
- 2: dec r16
- brpl 1b
- pop r16
- ret
- ENDF __rotldi3
- #endif /* defined (L_rotldi3) */
- .section .text.libgcc.fmul, "ax", @progbits
- /***********************************************************/
- ;;; Softmul versions of FMUL, FMULS and FMULSU to implement
- ;;; __builtin_avr_fmul* if !AVR_HAVE_MUL
- /***********************************************************/
- #define A1 24
- #define B1 25
- #define C0 22
- #define C1 23
- #define A0 __tmp_reg__
- #ifdef L_fmuls
- ;;; r23:r22 = fmuls (r24, r25) like in FMULS instruction
- ;;; Clobbers: r24, r25, __tmp_reg__
- DEFUN __fmuls
- ;; A0.7 = negate result?
- mov A0, A1
- eor A0, B1
- ;; B1 = |B1|
- sbrc B1, 7
- neg B1
- XJMP __fmulsu_exit
- ENDF __fmuls
- #endif /* L_fmuls */
- #ifdef L_fmulsu
- ;;; r23:r22 = fmulsu (r24, r25) like in FMULSU instruction
- ;;; Clobbers: r24, r25, __tmp_reg__
- DEFUN __fmulsu
- ;; A0.7 = negate result?
- mov A0, A1
- ;; FALLTHRU
- ENDF __fmulsu
- ;; Helper for __fmuls and __fmulsu
- DEFUN __fmulsu_exit
- ;; A1 = |A1|
- sbrc A1, 7
- neg A1
- #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
- ;; Some cores have problem skipping 2-word instruction
- tst A0
- brmi 1f
- #else
- sbrs A0, 7
- #endif /* __AVR_HAVE_JMP_CALL__ */
- XJMP __fmul
- 1: XCALL __fmul
- ;; C = -C iff A0.7 = 1
- NEG2 C0
- ret
- ENDF __fmulsu_exit
- #endif /* L_fmulsu */
- #ifdef L_fmul
- ;;; r22:r23 = fmul (r24, r25) like in FMUL instruction
- ;;; Clobbers: r24, r25, __tmp_reg__
- DEFUN __fmul
- ; clear result
- clr C0
- clr C1
- clr A0
- 1: tst B1
- ;; 1.0 = 0x80, so test for bit 7 of B to see if A must to be added to C.
- 2: brpl 3f
- ;; C += A
- add C0, A0
- adc C1, A1
- 3: ;; A >>= 1
- lsr A1
- ror A0
- ;; B <<= 1
- lsl B1
- brne 2b
- ret
- ENDF __fmul
- #endif /* L_fmul */
- #undef A0
- #undef A1
- #undef B1
- #undef C0
- #undef C1
- #include "lib1funcs-fixed.S"
|