]> git.ipfire.org Git - thirdparty/gcc.git/blame - libgcc/config/avr/libf7/libf7-asm.sx
Update copyright years.
[thirdparty/gcc.git] / libgcc / config / avr / libf7 / libf7-asm.sx
CommitLineData
7adcbafe 1;; Copyright (C) 2019-2022 Free Software Foundation, Inc.
f30dd607
GJL
2;;
3;; This file is part of LIBF7, which is part of GCC.
4;;
5;; GCC is free software; you can redistribute it and/or modify it under
6;; the terms of the GNU General Public License as published by the Free
7;; Software Foundation; either version 3, or (at your option) any later
8;; version.
9;;
10;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
11;; WARRANTY; without even the implied warranty of MERCHANTABILITY or
12;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13;; for more details.
14;;
15;; Under Section 7 of GPL version 3, you are granted additional
16;; permissions described in the GCC Runtime Library Exception, version
17;; 3.1, as published by the Free Software Foundation.
18;;
19;; You should have received a copy of the GNU General Public License and
20;; a copy of the GCC Runtime Library Exception along with this program;
21;; see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22;; <http://www.gnu.org/licenses/>. */
23
24#ifndef __AVR_TINY__
25
26#define ASM_DEFS_HAVE_DEFUN
27
28#include "asm-defs.h"
29#include "libf7.h"
30
31#define ZERO __zero_reg__
32#define TMP __tmp_reg__
33
34#define F7(name) F7_(name##_asm)
35
36.macro F7call name
37 .global F7(\name\())
38 XCALL F7(\name\())
39.endm
40
41.macro F7jmp name
42 .global F7(\name\())
43 XJMP F7(\name\())
44.endm
45
46;; Just for visibility in disassembly.
47.macro LLL name
48 .global LLL.\name
49 LLL.\name:
50 nop
51.endm
52
53.macro DEFUN name
54 .section .text.libf7.asm.\name, "ax", @progbits
55 .global F7(\name\())
56 .func F7(\name\())
57 F7(\name\()) :
58.endm
59
60.macro ENDF name
61 .size F7(\name\()), . - F7(\name\())
62 .endfunc
63.endm
64
65.macro LABEL name
66 .global F7(\name\())
67 F7(\name\()) :
68.endm
69
70.macro _DEFUN name
71 .section .text.libf7.asm.\name, "ax", @progbits
72 .weak \name
73 .type \name, @function
74 \name :
75.endm
76
77.macro _ENDF name
78 .size \name, . - \name
79.endm
80
81.macro _LABEL name
82 .weak \name
83 .type \name, @function
84 \name :
85.endm
86
87#define F7_NAME(X) F7_(X)
88
89;; Make a weak alias.
90.macro ALIAS sym
91 .weak \sym
92 .type \sym, @function
93 \sym:
94.endm
95
96;; Make a weak alias if double is 64 bits wide.
97.macro DALIAS sym
98#if defined (WITH_LIBF7_MATH_SYMBOLS) && __SIZEOF_DOUBLE__ == 8
99ALIAS \sym
100#endif
101.endm
102
103;; Make a weak alias if long double is 64 bits wide.
104.macro LALIAS sym
105#if defined (WITH_LIBF7_MATH_SYMBOLS) && __SIZEOF_LONG_DOUBLE__ == 8
106ALIAS \sym
107#endif
108.endm
109
110#define Off 1
111#define Expo (Off + F7_MANT_BYTES)
112
113#ifdef F7MOD_classify_
114;; r24 = classify (*Z)
115;; NaN -> F7_FLAG_nan
116;; INF -> F7_FLAG_inf [ | F7_FLAG_sign ]
117;; ==0 -> F7_FLAG_zero
118;; ... -> 0 [ | F7_FLAG_sign ]
119
120;; Clobbers: None (no TMP, no T).
121DEFUN classify
122
123 ld r24, Z
124 lsr r24
125 brne .Lnan_or_inf
126
127 ldd r24, Z+6+Off
128 tst r24
129 brpl 0f
130 sbc r24, r24
131 andi r24, F7_FLAG_sign
132 ret
133
1340: ldi r24, F7_FLAG_zero
135 ret
136
137.Lnan_or_inf:
138 rol r24
139 ret
140
141ENDF classify
142#endif /* F7MOD_classify_ */
143
144#ifdef F7MOD_clr_
145DEFUN clr
146 std Z+0, ZERO
147 std Z+0+Off, ZERO
148 std Z+1+Off, ZERO
149 std Z+2+Off, ZERO
150 std Z+3+Off, ZERO
151 std Z+4+Off, ZERO
152 std Z+5+Off, ZERO
153 std Z+6+Off, ZERO
154 std Z+0+Expo, ZERO
155 std Z+1+Expo, ZERO
156 ret
157ENDF clr
158
159#endif /* F7MOD_clr_ */
160
161#ifdef F7MOD_clz_
162;; The libcc CLZ implementations like __clzsi2 aka. __builtin_clzl are
163;; not very well suited for out purpose, so implement our own.
164
165#define ZBITS r26
166.macro .test.byte reg
167 or ZERO, \reg
168 brne .Loop_bit
169 subi ZBITS, -8
170.endm
171
172;; R26 = CLZ (uint64_t R18); CLZ (0) = 64.
173;; Unchanged: T
174DEFUN clzdi2
175 clr ZBITS
176 ;; Catch the common case of normalized .mant for speed-up.
177 tst r25
178 brmi 9f
179 .test.byte r25
180 .test.byte r24
181 .test.byte r23
182 .test.byte r22
183 .test.byte r21
184 .test.byte r20
185 .test.byte r19
186 .test.byte r18
187.Ldone:
188 clr ZERO
1899: ret
190
191.Loop_bit:
192 lsl ZERO
193 brcs .Ldone
194 inc ZBITS
195 rjmp .Loop_bit
196
197ENDF clzdi2
198#undef ZBITS
199#endif /* F7MOD_clz_ */
200
201#ifdef F7MOD_cmp_mant_
202DEFUN cmp_mant
203
204 adiw X, 6 + Off
205 ld r24, X $ ldd TMP, Z+6+Off $ SUB r24, TMP
206 brne .Lunequal
207
208 sbiw X, 6
209 ld r24, X+ $ ldd TMP, Z+0+Off $ SUB r24, TMP
210 ld r24, X+ $ ldd TMP, Z+1+Off $ sbc r24, TMP
211 ld r24, X+ $ ldd TMP, Z+2+Off $ sbc r24, TMP
212 ld r24, X+ $ ldd TMP, Z+3+Off $ sbc r24, TMP
213 ld r24, X+ $ ldd TMP, Z+4+Off $ sbc r24, TMP
214 ld r24, X+ $ ldd TMP, Z+5+Off $ sbc r24, TMP
215 ;; MSBs are already known to be equal
216 breq 9f
217.Lunequal:
218 sbc r24, r24
219 sbci r24, -1
2209: sbiw X, 6 + Off
221 ret
222ENDF cmp_mant
223#endif /* F7MOD_cmp_mant_ */
224
225#define CA 18
226#define C0 CA+1
227#define C1 C0+1
228#define C2 C0+2
229#define C3 C0+3
230#define C4 C0+4
231#define C5 C0+5
232#define C6 C0+6
233#define Carry r16
234#define Flags 18
235
236#ifdef F7MOD_store_
237;; Z->flags = CA.
238;; Z->mant = C[7].
239DEFUN store_mant.with_flags
240 st Z, CA
241
242;; Z->mant = C[7].
243LABEL store_mant
244 std Z+0+Off, C0
245 std Z+1+Off, C1
246 std Z+2+Off, C2
247 std Z+3+Off, C3
248 std Z+4+Off, C4
249 std Z+5+Off, C5
250 std Z+6+Off, C6
251 ret
252ENDF store_mant.with_flags
253#endif /* F7MOD_store_ */
254
255#ifdef F7MOD_load_
256;; CA = Z->flags
257;; C[7] = Z->mant
258DEFUN load_mant.with_flags
259 ld CA, Z
260 skipnext
261
262;; CA = 0
263;; C[7] = Z->mant
264LABEL load_mant.clr_CA
265LABEL load_mant.clr_flags
266 clr CA ; May be skipped
267
268;; C[7] = Z->mant
269LABEL load_mant
270 ldd C0, Z+0+Off
271 ldd C1, Z+1+Off
272 ldd C2, Z+2+Off
273 ldd C3, Z+3+Off
274 ldd C4, Z+4+Off
275 ldd C5, Z+5+Off
276 ldd C6, Z+6+Off
277 ret
278ENDF load_mant.with_flags
279#endif /* F7MOD_load_ */
280
281#ifdef F7MOD_copy_
282DEFUN copy
283 cp XL, ZL
284 cpc XH, ZH
285 breq 9f
286 adiw XL, 10
287 adiw ZL, 10
288 set
289 bld ZERO, 1
290 bld ZERO, 3 ; ZERO = 0b1010 = 10.
291.Loop:
292 ld TMP, -X
293 st -Z, TMP
294 dec ZERO
295 brne .Loop
2969: ret
297ENDF copy
298#endif /* F7MOD_copy_ */
299
300#ifdef F7MOD_copy_P_
301DEFUN copy_P
302 set
303 bld ZERO, 1
304 bld ZERO, 3 ; ZERO = 0b1010 = 10.
305.Loop:
306#ifdef __AVR_HAVE_LPMX__
307 lpm TMP, Z+
308#else
309 lpm
310 adiw Z, 1
311#endif /* Have LPMx */
312 st X+, TMP
313 dec ZERO
314 brne .Loop
315 sbiw X, 10
316 sbiw Z, 10
317 ret
318ENDF copy_P
319#endif /* F7MOD_copy_P_ */
320
321#ifdef F7MOD_copy_mant_
322DEFUN copy_mant
323 cp XL, ZL
324 cpc XH, ZH
325 breq 9f
326 adiw XL, 1
327 adiw ZL, 1
328 set
329 bld ZERO, 3
330 dec ZERO ; ZERO = 7
331.Loop:
332 ld TMP, X+
333 st Z+, TMP
334 dec ZERO
335 brne .Loop
336 sbiw XL, 8
337 sbiw ZL, 8
3389: ret
339ENDF copy_mant
340#endif /* F7MOD_copy_mant_ */
341
342
343#ifdef F7MOD_clr_mant_lsbs_
344DEFUN clr_mant_lsbs
345 push r16
346 mov r16, r20
347 wmov XL, r24
348
349 wmov ZL, r22
350 F7call load_mant
351
352 F7call lshrdi3
353
354 clr CA
355
356 F7call ashldi3
357
358 pop r16
359
360 wmov ZL, XL
361 F7jmp store_mant
362
363ENDF clr_mant_lsbs
364#endif /* F7MOD_clr_mant_lsbs_ */
365
366
367#ifdef F7MOD_normalize_with_carry_
368;; Z = &f7_t
369;; C[] = .mant may be not normalized
370;; Carry === r16 = Addend to Z->expo in [-64, 128).
371;; Normalize C[], set Flags, and adjust Z->expo.
372;; Return CA (after normalization) in TMP.
373;; Unchanged: T
374#define Addend r17
375#define Zbits r26
376#define expL r26
377#define expH r27
378DEFUN normalize_with_carry
379 mov Addend, Carry
380 tst C6
381 brmi .Lshift.0
382 ;; r26 = CLZ (uint64_t R18)
383 F7call clzdi2
384 cpi Zbits, 64
385 breq .Lclr
386 sub Addend, Zbits
387 mov r16, Zbits
388
389 F7call ashldi3
390 ;; Assert (R25.7 == 1)
391.Lshift.0:
392 mov TMP, CA
393 ld Flags, Z
394
395 ;; .expo += Addend
396 ldd expL, Z+0+Expo
397 ldd expH, Z+1+Expo
398 ;; Sign-extend Addend
399 clr r16
400 sbrc Addend, 7
401 com r16
402
403 ;; exp += (int8_t) Addend, i.e. sign-extend Addend.
404 add expL, Addend
405 adc expH, r16
406 brvc .Lnormal
407 tst r16
408 brmi .Lclr
409 ;; Overflow
410#if F7_HAVE_Inf == 1
411 ori Flags, F7_FLAG_inf
412#else
413 ldi Flags, F7_FLAG_nan
414#endif /* Have Inf */
415 ret
416
417.Lnormal:
418 std Z+0+Expo, expL
419 std Z+1+Expo, expH
420 ret
421
422.Lclr:
423 ;; Underflow or Zero.
424 clr TMP
425 .global __clr_8
426 XJMP __clr_8
427
428LABEL normalize.store_with_flags
429 ;; no rounding
430 set
431 skipnext
432LABEL normalize.round.store_with_flags
433 ;; with rounding
434 clt ; skipped ?
435LABEL normalize.maybe_round.store_with_flags
436 F7call normalize_with_carry
437 ;; We have:
438 ;; Z = &f7_t
439 ;; X = .expo
440 ;; C[] = .mant
441 ;; R18 = .flags
442 ;; TMP = byte below .mant after normalization
443 ;; T = 1 => no rounding.
444 brts .Lstore
445 lsl TMP
446 adc C0, ZERO
447 brcc .Lstore
448 adc C1, ZERO
449 adc C2, ZERO
450 adc C3, ZERO
451 adc C4, ZERO
452 adc C5, ZERO
453 adc C6, ZERO
454 brcc .Lstore
455 ;; We only come here if C6 overflowed, i.e. C[] is 0 now.
456 ;; .mant = 1.0 by restoring the MSbit.
457 ror C6
458 ;; .expo += 1 and override the .expo stored during normalize.
459 adiw expL, 1
460 std Z+0+Expo, expL
461 std Z+1+Expo, expH
462
463.Lstore:
464 F7call store_mant.with_flags
465
466 ;; Return the byte below .mant after normalization.
467 ;; This is only useful without rounding; the caller will know.
468 mov R24, TMP
469 ret
470ENDF normalize_with_carry
471#endif /* F7MOD_normalize_with_carry_ */
472
473
474#ifdef F7MOD_normalize_
475;; Using above functionality from C.
476;; f7_t* normalize (f7_t *cc)
477;; Adjusts cc->expo
478;; Clears cc->flags
479DEFUN normalize
480 push r17
481 push r16
482 wmov ZL, r24
483 F7call load_mant.clr_CA
484 clr Carry
485 st Z, ZERO
486 F7call normalize.store_with_flags
487 wmov r24, Z
488 pop r16
489 pop r17
490 ret
491ENDF normalize
492#endif /* F7MOD_normalize_ */
493
494
495#ifdef F7MOD_store_expo_
496#define Done r24
497#define expLO r24
498#define expHI r25
499;; expo == INT16_MAX => *Z = Inf, return Done = true.
500;; expo == INT16_MIN => *Z = 0x0, return Done = true.
501;; else => Z->expo = expo, return Done = false.
502DEFUN store_expo
503 cpi expHI, 0x80
504 cpc expLO, ZERO
505 breq .Ltiny
506 adiw expLO, 1
507 brvs .Lhuge
508 sbiw expLO, 1
509 std Z+0+Expo, expLO
510 std Z+1+Expo, expHI
511 ldi Done, 0
512 ret
513
514.Lhuge:
515#if F7_HAVE_Inf == 1
516 ld Done, Z
517 andi Done, F7_FLAG_sign
518 ori Done, F7_FLAG_inf
519#else
520 ldi Done, F7_FLAG_nan
521#endif /* Have Inf */
522 st Z, Done
523 ldi Done, 1
524 ret
525
526.Ltiny:
527 ldi Done, 1
528 F7jmp clr
529ENDF store_expo
530#endif /* F7MOD_store_expo_ */
531
532
533#ifdef F7MOD_set_u64_
534DEFUN set_s64
535 set
536 skipnext
537 ;; ...
538LABEL set_u64
539 clt ; Skipped?
540 wmov Zl, r16
541 ;; TMP holds .flags.
542 clr TMP
543 brtc .Lnot.negative
544
545 bst C6, 7
546 brtc .Lnot.negative
547 bld TMP, F7_FLAGNO_sign
548 .global __negdi2
549 XCALL __negdi2
550
551.Lnot.negative:
552 st Z, TMP
553 std Z+0+Expo, ZERO
554 std Z+1+Expo, ZERO
555 ldi Carry, 63
556 F7call normalize.round.store_with_flags
557 wmov r24, Z
558 wmov r16, Z ; Unclobber r16.
559 ret
560ENDF set_s64
561#endif /* F7MOD_set_u64_ */
562
563
564#ifdef F7MOD_to_integer_
565#define Mask r26
566DEFUN to_integer
567 wmov ZL, r24
568 mov Mask, r22
569
570 F7call load_mant.with_flags
571
572 sbrc Flags, F7_FLAGNO_nan
573 rjmp .Lset_0x8000
574
575 sbrc Flags, F7_FLAGNO_inf
576 rjmp .Lsaturate
577
578 sbrs C6, 7
579 rjmp .Lset_0x0000
580
581 bst Flags, F7_FLAGNO_sign
582 ldd r27, Z+0+Expo
583 ;; Does .expo have bits outside Mask? ...
584 mov TMP, Mask
585 com TMP
586 and TMP, r27
587 ldd r27, Z+1+Expo
588 tst r27
589 brmi .Lset_0x0000 ; ...yes: .expo is < 0 => return 0
590 or TMP, r27
591 brne .Lsaturate.T ; ...yes: .expo > Mask => saturate
592
593 ;; ...no: Shift right to meet .expo = 0.
594 PUSH r16
595 ldd r16, Z+0+Expo
596 eor r16, Mask
597 and r16, Mask
598 clr CA
599 F7call lshrdi3
600 POP r16
601 tst C6
602 brmi .Lsaturate.T ; > INTxx_MAX => saturate
603
604 rcall .Lround
605 brmi .Lsaturate.T ; > INTxx_MAX => saturate
606
607 brtc 9f ; >= 0 => return
608 sbrc Mask, 5
609 .global __negdi2
610 XJMP __negdi2
611 sbrc Mask, 4
612 .global __negsi2
613 XJMP __negsi2
614 neg C6
615 neg C5
616 sbci C6, 0
6179: ret
618
619.Lsaturate:
620 bst Flags, F7_FLAGNO_sign
621.Lsaturate.T:
622
623#if F7_HAVE_Inf
624 brtc .Lset_0x7fff
625 ;; -Inf => return 1 + INTxx_MIN
626 mov ZL, Flags
627 .global __clr_8
628 XCALL __clr_8
629 ldi C6, 0x80
630
631 ldi CA+0, 0x01
632
633 sbrs Mask, 5
634 ldi CA+4, 0x01
635
636 sbrs Mask, 4
637 ldi CA+6, 0x01
638 ret
639
640.Lset_0x7fff:
641 ;; +Inf => return INTxx_MAX
642 sec
643 .global __sbc_8
644 XCALL __sbc_8
645 ldi C6, 0x7f
646 ret
647#endif /* F7_HAVE_Inf */
648
649.Lset_0x8000:
650 ;; NaN => return INTxx_MIN
651 .global __clr_8
652 XCALL __clr_8
653 ldi C6, 0x80
654 ret
655
656.Lset_0x0000:
657 ;; Small value => return 0x0
658 .global __clr_8
659 XJMP __clr_8
660
661.Lround:
662 ;; C6.7 is known to be 0 here.
663 ;; Return N = 1 iff we have to saturate.
664 cpi Mask, 0xf
665 breq .Lround16
666 cpi Mask, 0x1f
667 breq .Lround32
668
669 ;; For now, no rounding in the 64-bit case. This rounding
670 ;; would have to be integrated into the right-shift.
671 cln
672 ret
673
674.Lround32:
675 rol C2
676 adc C3, ZERO
677 adc C4, ZERO
678 rjmp 2f
679
680.Lround16:
681 rol C4
6822: adc C5, ZERO
683 adc C6, ZERO
684 ret
685ENDF to_integer
686#endif /* F7MOD_to_integer_ */
687
688
689#ifdef F7MOD_to_unsigned_
690#define Mask r26
691DEFUN to_unsigned
692 wmov ZL, r24
693 mov Mask, r22
694
695 F7call load_mant.with_flags
696
697 sbrc Flags, F7_FLAGNO_nan
698 rjmp .Lset_0xffff
699
700 sbrc Flags, F7_FLAGNO_sign
701 rjmp .Lset_0x0000
702
703 sbrc Flags, F7_FLAGNO_inf
704 rjmp .Lset_0xffff
705
706 sbrs C6, 7
707 rjmp .Lset_0x0000
708
709 ldd r27, Z+0+Expo
710 ;; Does .expo have bits outside Mask? ...
711 mov TMP, Mask
712 com TMP
713 and TMP, r27
714 ldd r27, Z+1+Expo
715 tst r27
716 brmi .Lset_0x0000 ; ...yes: .expo is < 0 => return 0
717 or TMP, r27
718 brne .Lset_0xffff ; ...yes: .expo > Mask => saturate
719
720 ;; ...no: Shift right to meet .expo = 0.
721 PUSH r16
722 ldd r16, Z+0+Expo
723 eor r16, Mask
724 and r16, Mask
725 clr CA
726 F7call lshrdi3
727 POP r16
728
729 ;; Rounding
730 ;; ??? C6.7 is known to be 0 here.
731 cpi Mask, 0xf
732 breq .Lround16
733 cpi Mask, 0x1f
734 breq .Lround32
735
736 ;; For now, no rounding in the 64-bit case. This rounding
737 ;; would have to be integrated into the right-shift.
738 ret
739
740.Lround32:
741 rol C2
742 adc C3, ZERO
743 adc C4, ZERO
744 rjmp 2f
745
746.Lround16:
747 rol C4
7482: adc C5, ZERO
749 adc C6, ZERO
750 brcs .Lset_0xffff ; Rounding overflow => saturate
751 ret
752
753.Lset_0xffff:
754 ;; return UINTxx_MAX
755 sec
756 .global __sbc_8
757 XJMP __sbc_8
758
759.Lset_0x0000:
760 ;; Small value => return 0x0
761 .global __clr_8
762 XJMP __clr_8
763
764ENDF to_unsigned
765#endif /* F7MOD_to_unsigned_ */
766
767
768#ifdef F7MOD_addsub_mant_scaled_
769;; int8_t f7_addsub_mant_scaled_asm (f7_t *r24, const f7_t *r22, const f7_t 20*,
770;; uint8_t r18);
771;; R18.0 = 1 : ADD
772;; R18.0 = 0 : SUB
773;; R18[7..1] : Scale
774;; Compute *R24 = *R22 + *R20 >> R18[7..1].
775
776#define BA 10
777#define B0 BA+1
778#define B1 B0+1
779#define B2 B0+2
780#define B3 B0+3
781#define B4 B0+4
782#define B5 B0+5
783#define B6 B0+6
784
785DEFUN addsub_mant_scaled
786 do_prologue_saves 10
787
788 bst r18, 0 ;; ADD ?
789 lsr r18
790 mov r16, r18
791
792 wmov ZL, r20
793 wmov YL, r22
794 ;; C[] = bb >> shift
795 wmov XL, r24
796
797 F7call load_mant.clr_CA
798 F7call lshrdi3
799
800 wmov BA, CA
801 wmov B1, C1
802 wmov B3, C3
803 wmov B5, C5
804 wmov ZL, YL
805 F7call load_mant.clr_CA
806
807 wmov ZL, XL
808
809 brts .Ladd
810
811 .global __subdi3
812 XCALL __subdi3
813
814 breq .Lzero
815 brcc .Lround
816 ;; C = 1: Can underflow happen at all ?
817.Lzero:
818 F7call clr
819 rjmp .Lepilogue
820
821.Ladd:
822 .global __adddi3
823 XCALL __adddi3
824 brcc .Lround
825 ldi Carry, 1
826 .global __lshrdi3
827 XCALL __lshrdi3
828 ori C6, 1 << 7
829 skipnext
830.Lround:
831 clr Carry ; skipped?
832 F7call normalize.round.store_with_flags
833
834.Lepilogue:
835 do_epilogue_restores 10
836
837ENDF addsub_mant_scaled
838
839#if !defined (__AVR_HAVE_MOVW__) || !defined (__AVR_HAVE_JMP_CALL__)
840DEFUN lshrdi3
841 .global __lshrdi3
842 XJMP __lshrdi3
843ENDF lshrdi3
844DEFUN ashldi3
845 .global __ashldi3
846 XJMP __ashldi3
847ENDF ashldi3
848#else
849
850# Basically just a wrapper around libgcc's __lshrdi3.
851DEFUN lshrdi3
852 ;; Handle bit 5 of shift offset.
853 sbrs r16, 5
854 rjmp 4f
855 wmov CA, C3
856 wmov C1, C5
857 clr C6 $ clr C5 $ wmov C3, C5
8584:
859 ;; Handle bit 4 of shift offset.
860 sbrs r16, 4
861 rjmp 3f
862 wmov CA, C1
863 wmov C1, C3
864 wmov C3, C5
865 clr C6 $ clr C5
8663:
867 ;; Handle bits 3...0 of shift offset.
868 push r16
869 andi r16, 0xf
870 breq 0f
871
872 .global __lshrdi3
873 XCALL __lshrdi3
8740:
875 pop r16
876 ret
877ENDF lshrdi3
878
879# Basically just a wrapper around libgcc's __ashldi3.
880DEFUN ashldi3
881 ;; Handle bit 5 of shift offset.
882 sbrs r16, 5
883 rjmp 4f
884 wmov C5, C1
885 wmov C3, CA
886 clr C2 $ clr C1 $ wmov CA, C1
8874:
888 ;; Handle bit 4 of shift offset.
889 sbrs r16, 4
890 rjmp 3f
891 wmov C5, C3
892 wmov C3, C1
893 wmov C1, CA
894 clr CA $ clr C0
8953:
896 ;; Handle bits 3...0 of shift offset.
897 push r16
898 andi r16, 0xf
899 breq 0f
900
901 .global __ashldi3
902 XCALL __ashldi3
9030:
904 pop r16
905 ret
906ENDF ashldi3
907#endif /* Small device */
908
909#endif /* F7MOD_addsub_mant_scaled_ */
910
911#if defined F7MOD_mul_mant_ && defined (__AVR_HAVE_MUL__)
912 #define A0 11
913 #define A1 A0+1
914 #define A2 A0+2
915 #define A3 A0+3
916 #define A4 A0+4
917 #define A5 A0+5
918 #define A6 A0+6
919
920 #define TT0 26
921 #define TT1 TT0+1
922 #define TT2 28
923 #define TT3 TT2+1
924
925 #define BB 10
926
927;; R18.0 = 1: No rounding.
928
929DEFUN mul_mant
930 do_prologue_saves 10
931 bst r18, 0
932 push r25
933 push r24
934 movw ZL, r22
935 LDD A0, Z+0+Off
936 LDD A1, Z+1+Off
937 LDD A2, Z+2+Off
938 LDD A3, Z+3+Off
939 LDD A4, Z+4+Off
940 LDD A5, Z+5+Off
941 LDD A6, Z+6+Off
942 movw ZL, r20
943
944 ;; 6 * 6 -> 6:5
945 ;; 4 * 6 -> 4:3
946 ;; 2 * 6 -> 2:1
947 ;; 0 * 6 -> 0:a
948 ldd BB, Z+6+Off
949 mul A6, BB $ movw C5, r0
950 mul A4, BB $ movw C3, r0
951 mul A2, BB $ movw C1, r0
952 mul A0, BB $ movw CA, r0
953
954 ;; 5 * 6 -> 5:4
955 ;; 3 * 6 -> 3:2
956 ;; 1 * 6 -> 1:0
957 mul A5, BB $ movw TT2, r0
958 mul A3, BB $ movw TT0, r0
959 mul A1, BB
960 ADD C0, r0 $ adc C1, r1
961 adc C2, TT0 $ adc C3, TT1
962 adc C4, TT2 $ adc C5, TT3 $ clr ZERO
963 adc C6, ZERO
964 ;; Done B6
965
966 ;; 3 * 3 -> 0:a
967 ;; 4 * 4 -> 2:1
968 ;; 5 * 5 -> 4:3
969 ldd BB, Z+3+Off $ mul A3, BB $ movw TT0, r0
970 ldd BB, Z+4+Off $ mul A4, BB $ movw TT2, r0
971 ldd BB, Z+5+Off $ mul A5, BB
972
973 ADD CA, TT0 $ adc C0, TT1
974 adc C1, TT2 $ adc C2, TT3
975 adc C3, r0 $ adc C4, r1
976 brcc .+2
977 adiw C5, 1
978
979 ;; 6 * 5 -> 5:4
980 ;; 4 * 5 -> 3:2
981 ;; 2 * 5 -> 1:0
982 ;; 0 * 5 -> a:-
983 mul A0, BB
984 ;; A0 done
985#define Atmp A0
986
987 mov Atmp, r1
988 mul A6, BB $ movw TT2, r0
989 mul A4, BB $ movw TT0, r0
990 mul A2, BB
991
992 ADD CA, Atmp
993 adc C0, r0 $ adc C1, r1
994 adc C2, TT0 $ adc C3, TT1
995 adc C4, TT2 $ adc C5, TT3 $ clr ZERO
996 adc C6, ZERO
997
998 ;; 1 * 5 -> 0:a
999 ;; 3 * 5 -> 2:1
1000 ;; 6 * 4 -> 4:3
1001 mul A1, BB $ movw TT0, r0
1002 mul A3, BB $ movw TT2, r0
1003 ldd BB, Z+4+Off
1004 mul A6, BB
1005
1006 ADD CA, TT0 $ adc C0, TT1
1007 adc C1, TT2 $ adc C2, TT3
1008 adc C3, r0 $ adc C4, r1 $ clr ZERO
1009 adc C5, ZERO $ adc C6, ZERO
1010 ;; B5 done
1011
1012 ;; 6 * 3 -> 3:2
1013 ;; 6 * 1 -> 1:0
1014 ;; 4 * 1 -> a:-
1015 mov TT0, A6 $ ldd TMP, Z+3+Off
1016 mov BB, A4 $ ldd Atmp, Z+1+Off
1017 rcall .Lmul.help.3
1018
1019 ;; 5 * 4 -> 3:2
1020 ;; 5 * 2 -> 1:0
1021 ;; 3 * 2 -> a:-
1022 mov TT0, A5 $ ldd TMP, Z+4+Off
1023 mov BB, A3 $ ldd Atmp, Z+2+Off
1024 rcall .Lmul.help.3
1025
1026 ;; 4 * -> 3:2 (=0)
1027 ;; 4 * 3 -> 1:0
1028 ;; 2 * 3 -> a:-
1029 mov TT0, A4 $ clr TMP
1030 mov BB, A2 $ ldd Atmp, Z+3+Off
1031 rcall .Lmul.help.3
1032
1033 ;; 3 * . -> 3:2 (=0)
1034 ;; 3 * 4 -> 1:0
1035 ;; 1 * 4 -> a:-
1036 mov TT0, A3 $ clr TMP
1037 mov BB, A1 $ ldd Atmp, Z+4+Off
1038 rcall .Lmul.help.3
1039
1040 ;; . * ? -> 3:2 (=0)
1041 ;; . * 0 -> 1:0 (=0)
1042 ;; 5 * 0 -> a:-
1043 clr TT0
1044 mov BB, A5 $ ldd Atmp, Z+0+Off
1045 rcall .Lmul.help.3
1046
1047 clr TT3 ;; Asserted by .Lmul.help.2
1048 ;; 6 * 2 -> 2:1
1049 ;; 6 * 0 -> 0:a
1050 $ ldd TMP, Z+2+Off
1051 mov BB, A6 ;$ ldd Atmp, Z+0+Off
1052 rcall .Lmul.help.2
1053
1054 ;; 5 * 3 -> 2:1
1055 ;; 5 * 1 -> 0:a
1056 $ ldd TMP, Z+3+Off
1057 mov BB, A5 $ ldd Atmp, Z+1+Off
1058 rcall .Lmul.help.2
1059
1060 ;; 4 * . -> 2:1 (=0)
1061 ;; 4 * 2 -> 0:a
1062 $ clr TMP
1063 mov BB, A4 $ ldd Atmp, Z+2+Off
1064 rcall .Lmul.help.2
1065
1066 ;; 2 * . -> 2:1 (=0)
1067 ;; 2 * 4 -> 0:a
1068 $ clr TMP
1069 mov BB, A2 $ ldd Atmp, Z+4+Off
1070 rcall .Lmul.help.2
1071
1072 ;; Finally...
1073
1074 pop ZL
1075 pop ZH
1076 ;; The high byte is at least 0x40 and at most 0xfe.
1077 ;; The result has to be left-shifted by one in order to scale it
1078 ;; correctly.
1079
1080 ldi Carry, 1
1081 F7call normalize.maybe_round.store_with_flags
1082
1083 do_epilogue_restores 10
1084
1085;; TT0 * Tmp -> 3:2
1086;; TT0 * Atmp -> 1:0
1087;; BB * Atmp -> a:-
1088;;
1089;; Clobbers : TMP, TT0...TT3.
1090;; Sets : ZERO = 0.
1091.Lmul.help.3:
1092 mul TT0, TMP $ movw TT2, r0
1093 mul TT0, Atmp $ movw TT0, r0
1094 mul BB, Atmp
1095
1096 ADD CA, r1
1097 adc C0, TT0 $ adc C1, TT1
1098 adc C2, TT2
1099.Lmul.help.3.C3: $ adc C3, TT3 $ clr ZERO
1100 adc C4, ZERO $ adc C5, ZERO
1101 adc C6, ZERO
1102 ret
1103
1104;; BB * TMP -> 2:1
1105;; BB * Atmp -> 0:a
1106;;
1107;; Asserts : TT3 = 0
1108;; Clobbers : TMP, TT0, TT1.
1109;; Sets : ZERO = 0.
1110.Lmul.help.2:
1111 mul BB, TMP $ movw TT0, r0
1112 mul BB, Atmp
1113 ADD CA, r0 $ adc C0, r1
1114 adc C1, TT0 $ adc C2, TT1
1115 rjmp .Lmul.help.3.C3
1116
1117ENDF mul_mant
1118#endif /* F7MOD_mul_mant_ && MUL */
1119
1120
1121#if defined (F7MOD_div_)
1122
1123;; Dividend is C[]
1124
1125;; Divisor
1126#define A0 9
1127#define A1 10
1128#define A2 11
1129#define A3 12
1130#define A4 13
1131#define A5 14
1132#define A6 15
1133
1134;; Quotient
1135#define Q0 0 /* === TMP */
1136#define Q1 Q0+1 /* === ZERO */
1137#define Q2 26
1138#define Q3 Q2+1
1139#define Q4 28
1140#define Q5 Q4+1
1141#define Q6 16
1142#define Q7 Q6+1
1143
1144#define Cnt CA
1145#define QBits r8
1146
1147DEFUN div
1148 do_prologue_saves 12
1149
1150 ;; Number of bits requested for the quotient.
1151 ;; This is usually 2 + F7_MANT_BITS.
1152 mov QBits, r20
1153 wmov ZL, r22
1154 LDD A0, Z+0+Off
1155 LDD A1, Z+1+Off
1156 LDD A2, Z+2+Off
1157 LDD A3, Z+3+Off
1158 LDD A4, Z+4+Off
1159 LDD A5, Z+5+Off
1160 LDD A6, Z+6+Off
1161 wmov ZL, r24
1162 F7call load_mant
1163
1164 ;; Clear quotient Q[].
1165 clr Q0 ; === TMP
1166 ;clr Q1 ; === ZERO
1167 wmov Q2, Q0
1168 wmov Q4, Q0
1169 wmov Q6, Q0
1170
1171 ;; C[] and A[] are valid mantissae, i.e. their MSBit is set. Therefore,
1172 ;; quotient Q[] will be in [0x0.ff..., 0x0.40...] and to adjust Q[] we
1173 ;; need at most 1 left-shift. Compute F7_MANT_BITS + 2 bits of the
1174 ;; quotient: One bit is used for rounding, and one bit might be consumed
1175 ;; by the mentioned left-shift.
1176 mov Cnt, QBits
1177 rjmp .Loop_start
1178
1179.Loop:
1180 ;; Shift dividend.
1181 LSL C0
1182 rol C1
1183 rol C2
1184 rol C3
1185 rol C4
1186 rol C5
1187 rol C6
1188 brcs .Lfits
1189 ;; Compare dividend against divisor.
1190.Loop_start:
1191 CP C0, A0
1192 cpc C1, A1
1193 cpc C2, A2
1194 cpc C3, A3
1195 cpc C4, A4
1196 cpc C5, A5
1197 cpc C6, A6
1198 ;; Shift 0 into quotient.
1199 brlo 1f
1200.Lfits:
1201 ;; Divisor fits into dividend.
1202 SUB C0, A0
1203 sbc C1, A1
1204 sbc C2, A2
1205 sbc C3, A3
1206 sbc C4, A4
1207 sbc C5, A5
1208 sbc C6, A6
1209 ;; Shift 1 into quotient.
1210 sec
1211 rol Q0
1212 skipnext
12131: lsl Q0
1214 rol Q1
1215 rol Q2
1216 rol Q3
1217 rol Q4
1218 rol Q5
1219 rol Q6
1220 rol Q7
1221 dec Cnt
1222 brne .Loop
1223
1224 wmov CA, Q0
1225 wmov C1, Q2
1226 wmov C3, Q4
1227 wmov C5, Q6
1228 clr ZERO
1229
1230 ldi Carry, 64
1231 sub Carry, QBits
1232 F7call normalize.round.store_with_flags
1233
1234 do_epilogue_restores 12
1235ENDF div
1236
1237#endif /* F7MOD_div_ */
1238
1239
1240#if defined (F7MOD_sqrt16_) && defined (__AVR_HAVE_MUL__)
1241
1242#define Mask C6
1243#define Q0 C3 /* = R22 */
1244#define Q1 C4 /* = R23 */
1245
1246;; uint16_t R24 = sqrt16_XXX (uint16_t R24);
1247;; Clobbers: R22, R23, TMP.
1248;;
1249;; XXX = floor: Return integral part of square-root of R25:R24 with R25 = 0.
1250;; Error is in [0, -1 LSB).
1251;; XXX = round: Return quare-root of R25:R24 rounded to nearest integer.
1252;; R25 = (Q[] >= 65281) = (Q > 0xff00), i.e. if Q[] is not
1253;; bigger than 0xff00, then the result fits in 8 bits.
1254;; Return C = 0 if the result is the same as for XXX = floor,
1255;; error in [0, -1/2 LSB)
1256;; Return C = 1 if the result is one higher than for XXX = floor,
1257;; error in [1/2 LSB, 0).
1258DEFUN sqrt16_round
1259 set
1260 skipnext
1261 ;; ...
1262LABEL sqrt16_floor
1263 clt ; Skipped?
1264 movw Q0, r24
1265 clr C5
1266 ldi Mask, 1 << 7
1267
1268.Loop_mask:
1269 add C5, Mask
1270 mul C5, C5
1271 cp Q0, R0
1272 cpc Q1, R1
1273 brsh 1f
1274 sub C5, Mask
12751: lsr Mask
1276 brne .Loop_mask
1277
1278 brtc .Ldone ; No rounding => C6 will be 0.
1279
1280 ;; Rounding: (X + 1/2)^2 = X^2 + X + 1/4, thus probing
1281 ;; for bit -1 is testing Q[] against C5^2 + C5.
1282 mul C5, C5
1283 add R0, C5
1284 adc R1, C6 ; Exploit C6 === Mask = 0.
1285 cp R0, Q0
1286 cpc R1, Q1
1287 brcc .Ldone
1288 ;; If C5^2 + C5 + 1/4 fits into Q[], then round up and C = 1.
1289 adiw C5, 1 ; Exploit C6 === Mask = 0.
1290 sec
1291
1292.Ldone:
1293 clr __zero_reg__
1294 ret
1295ENDF sqrt16_round
1296#undef Mask
1297#undef Q0
1298#undef Q1
1299#endif /* F7MOD_sqrt16_ && MUL */
1300
1301#ifdef F7MOD_sqrt_approx_
1302DEFUN sqrt_approx
1303 push r17
1304 push r16
1305 wmov XL, r24
1306 wmov ZL, r22
1307
1308 ;; C[] = 0.
1309 .global __clr_8
1310 XCALL __clr_8
1311
1312 ldd C5, Z+5+Off
1313 ldd C6, Z+6+Off
1314
1315 ldd Carry, Z+0+Expo
1316 ldd TMP, Z+1+Expo
1317 wmov ZL, XL
1318
1319 st Z, ZERO
1320
1321 asr TMP
1322 ror Carry
1323 std Z+1+Expo, TMP
1324 std Z+0+Expo, Carry
1325
1326 ;; Re-interpreting our Q-format 1.xx mantissa as Q2.yy, we have to shift
1327 ;; the mantissa to the right by 1. As we need an even exponent, multiply
1328 ;; the mantissa by 2 for odd exponents, i.e. only right-shift if .expo
1329 ;; is even.
1330
1331 brcs 1f
1332 lsr C6
1333 ror C5
1334
13351:
1336 F7call sqrt16_round
1337
1338 ;; sqrt16_round() returns: C = 0: error in [0, -1/2 LSB).
1339 ;; C = 1: error in [1/2 LSB, 0)
1340
1341 brcc 2f
1342 ;; Undo the round-up from sqrt16_round(); this will transform to
1343 ;; error in [-1/2 LSB, -1 LSB).
1344 sbiw C5, 1
1345 ;; Together with the correct bit C4.7, the error is in [0, -1/2 LSB).
1346 ori C4, 1 << 7
1347
13482: ;; Setting C4.6 adds 1/4 LSB and the error is now in [1/4 LSB, -1/4 LSB)
1349 ;; in either case.
1350 ori C4, 1 << 6
1351
1352 ;; ????????????
1353 ;; sqrt16_round() runs on integers which means that it computes the
1354 ;; square root of mant * 2^14 if we regard mant as Q-format 2.yy,
1355 ;; i.e. 2 integral bits. The result is sqrt(mant) * 2^7,
1356 ;; and in order to get the same scaling like the input, .expo has to
1357 ;; be adjusted by 7. ???????????????
1358
1359 ldi Carry, 8
1360 F7call normalize.store_with_flags
1361
1362 pop r16
1363 pop r17
1364 ret
1365
1366ENDF sqrt_approx
1367#endif /* F7MOD_sqrt_approx_ */
1368
1369
1370#undef CA
1371#undef C0
1372#undef C1
1373#undef C2
1374#undef C3
1375#undef C4
1376#undef C5
1377#undef C6
1378#undef Carry
1379
1380
1381#ifdef F7MOD_D_fabs_
1382_DEFUN __fabs
1383 DALIAS fabs
1384 LALIAS fabsl
1385 andi R25, 0b01111111
1386 ret
1387_ENDF __fabs
1388#endif /* F7MOD_D_fabs_ */
1389
1390
1391#ifdef F7MOD_D_neg_
1392_DEFUN __neg
1393_LABEL __negdf2
1394 subi R25, 0b10000000
1395 ret
1396_ENDF __neg
1397#endif /* F7MOD_D_neg_ */
1398
1399
1400#ifdef F7MOD_D_signbit_
1401_DEFUN __signbit
1402 DALIAS signbit
1403 LALIAS signbitl
1404 bst R25, 7
1405 clr R25
1406 clr R24
1407 bld R24, 0
1408 ret
1409_ENDF __signbit
1410#endif /* F7MOD_D_signbit_ */
1411
1412
1413#ifdef F7MOD_D_copysign_
1414_DEFUN __copysign
1415 DALIAS copysign
1416 LALIAS copysignl
1417 bst R17, 7
1418 bld R25, 7
1419 ret
1420_ENDF __copysign
1421#endif /* F7MOD_D_copysign_ */
1422
1423
1424#ifdef F7MOD_D_isinf_
1425_DEFUN __isinf
1426 DALIAS isinf
1427 LALIAS isinfl
1428 F7call class_D
1429 ;; Inf: T = Z = 1.
1430 brtc 0f
1431 ldi R24, 1
1432 breq 1f
14330:
1434 clr R24
14351:
1436 clr R25
1437 ret
1438_ENDF __isinf
1439#endif /* F7MOD_D_isinf_ */
1440
1441
1442#ifdef F7MOD_D_isnan_
1443_DEFUN __isnan
1444 DALIAS isnan
1445 LALIAS isnanl
1446 F7call class_D
1447 ;; NaN: T = 1, Z = 0.
1448 brtc 0f
1449 ldi R24, 1
1450 brne 1f
14510:
1452 clr R24
14531:
1454 clr R25
1455 ret
1456_ENDF __isnan
1457#endif /* F7MOD_D_isnan_ */
1458
1459
1460#ifdef F7MOD_D_isfinite_
1461_DEFUN __isfinite
1462 DALIAS isfinite
1463 LALIAS isfinitel
1464 F7call class_D
1465 ;; Number <=> T = 0.
1466 bld R24, 0
1467 com R24
1468 andi R24, 1
1469 clr R25
1470 ret
1471_ENDF __isfinite
1472#endif /* F7MOD_D_isfinite_ */
1473
1474
1475#ifdef F7MOD_D_class_
1476;; The encoded exponent has 11 Bits.
1477#define MAX_BIASED_EXPO 0b0111111111110000
1478
1479;; Classify a double in R18[]
1480;; Number: T-Flag = 0.
1481;; +-Inf : T-Flag = 1, Z-Flag = 1.
1482;; NaN : T-Flag = 1, Z-Flag = 0.
1483DEFUN class_D
1484 wmov R26, R24
1485 andi R26, lo8 (MAX_BIASED_EXPO)
1486 andi R27, hi8 (MAX_BIASED_EXPO)
1487 subi R26, lo8 (MAX_BIASED_EXPO)
1488 sbci R27, hi8 (MAX_BIASED_EXPO)
1489 clt
1490 brne .L.number
1491 set
1492 ;; Set sign and expo to 0.
1493 clr R25
1494 andi R24, lo8 (~MAX_BIASED_EXPO)
1495 ;; What remains is the mantissa.
1496 ;; Mantissa == 0 => +/-Inf.
1497 ;; Mantissa != 0 => NaN.
1498 ;; Compare R18[] against sign_extend(R26) with R26 = 0.
1499 .global __cmpdi2_s8
1500 XJMP __cmpdi2_s8
1501.L.number:
1502 ret
1503
1504ENDF class_D
1505#endif /* F7MOD_D_class_ */
1506
1507
1508#ifdef F7MOD_call_dd_
1509
1510;; Provide double wrappers for functions that operate on f7_t and get f7_t*.
1511;;
1512;; We set up a frame of sizeof(f7_t), convert the input double in R18[] to
1513;; f7_t in that frame location, then call *Z and finally convert the result f7_t
1514;; to double R18[] if that's requested.
1515;;
1516;; call_dd: double func (double A)
1517;; void (*Z) (f7_t *aa, const f7_t *aa)
1518;;
1519;; call_dx: double func (type_t A) , sizeof(type_t) <= 4
1520;; void (*Z) (f7_t *aa, type_t)
1521;;
1522;; call_xd: type_t func (double A)
1523;; type_t (*Z) (const f7_t *aa)
1524;;
1525;; call_ddx: double func (double A, word_t) , sizeof (word_t) <= 2
1526;; void (*Z) (f7_t *aa, const f7_t *aa, word_t)
1527
1528#define WHAT R13
1529
1530DEFUN call_dd ; WHAT = R13 = 3
1531 inc ZERO
1532LABEL call_xd ; WHAT = R13 = 2
1533 inc ZERO
1534LABEL call_ddx ; WHAT = R13 = 1
1535 inc ZERO
1536LABEL call_dx ; WHAT = R13 = 0
1537 push WHAT
1538 mov WHAT, ZERO
1539 clr ZERO
1540 ;; R14/R15 hold Z, the address of the f7_worker function, until we need it.
1541 push r14
1542 push r15
1543 wmov r14, Z
1544
1545#define n_pushed 4
1546#define n_frame 10
1547
1548 do_prologue_saves n_pushed, n_frame
1549 ;; Y = FramePointer + 1
1550 adiw Y, 1
1551 dec WHAT
1552 brmi .Ldx ; WHAT was initially 0.
1553 ;; FP + 1 = (f7_t) arg1
1554 wmov r16, Y
1555 ;; The double argument is in R18[].
1556 XCALL F7_NAME (set_double_impl)
1557 tst WHAT
1558 brne .Lno.ddx ; WHAT was initially != 1.
1559 ;; call_ddx: Set R20/21 to the 2-byte scalar / pointer argument.
1560 ;; Fetch it from where prologue_saves put it.
1561 ldd r20, Y + n_frame + 3 ; Saved R16
1562 ldd r21, Y + n_frame + 2 ; Saved R17
1563.Lno.ddx:
1564 wmov r22, Y ; &arg1 (input)
1565.Ldo.dx:
1566 wmov r24, Y ; &arg1 (output)
1567 wmov Z, r14
1568 XICALL
1569 dec WHAT
1570 breq .Lepilogue ; WHAT was initially 2: Return non-double.
1571 wmov r24, Y ; &arg1
1572 XCALL F7_NAME (get_double)
1573.Lepilogue:
1574 ;; + 3 to account for R13...R15 pushed prior to do_prologue_saves.
1575 do_epilogue_restores n_pushed + 3, n_frame
1576
1577.Ldx:
1578 ;; call_dx: Copy the 4-byte input scalar from R22[4] to R20[4].
1579 wmov r20, r22
1580 wmov r22, r24
1581 rjmp .Ldo.dx
1582
1583ENDF call_dd
1584#endif /* F7MOD_call_dd_ */
1585
1586
1587#ifdef F7MOD_call_ddd_
1588
1589;; Provide double wrappers for functions that operate on f7_t and get f7_t*.
1590;;
1591;; We set up a frame of 2 * sizeof(f7_t), convert the input doubles in R18[]
1592;; and R10[] to f7_t in these frame locations, then call *Z and finally
1593;; convert the result f7_t to double R18[] if that's requested.
1594;;
1595;; call_ddd: double func (double A, double B)
1596;; void (*Z) (f7_t *aa, const f7_t *aa, const f7_t *bb)
1597;;
1598;; call_xdd: type_t func (double A, double B)
1599;; type_t (*Z) (const f7_t *aa, const f7_t *bb)
1600
1601DEFUN call_ddd
1602 inc ZERO
1603LABEL call_xdd
1604 ;; R8/R9 hold Z, the address of the f7_worker function, until we need it.
1605 push r9
1606 push r8
1607 wmov r8, Z
1608 ;; This is an argument to call.2 and will be accessed by the arg pointer.
1609 push ZERO
1610 clr ZERO
1611 rcall call.2
1612 pop TMP
1613 pop r8
1614 pop r9
1615 ret
1616
1617#define n_pushed 4
1618#define n_frame 20
1619
1620call.2:
1621 do_prologue_saves n_pushed, n_frame
1622 ;; Y = FramePointer + 1
1623 adiw Y, 1
1624 ;; FP + 1 = (f7_t) arg1
1625 wmov r16, Y
1626 ;; First double argument is already in R18[].
1627 XCALL F7_NAME (set_double_impl)
1628 ;; FP + 11 = (f7_t) arg2
1629 wmov r16, Y
1630 subi r16, lo8 (-10)
1631 sbci r17, hi8 (-10)
1632 ;; Move second double argument to R18[].
1633 wmov r18, r10
1634 wmov r20, r12
1635 wmov r22, r14
1636 ;; Get high word of arg2 from where prologue_saves put it.
1637 ldd r24, Y + n_frame + 3 ; Saved R16
1638 ldd r25, Y + n_frame + 2 ; Saved R17
1639 XCALL F7_NAME (set_double_impl)
1640 ;; Z (f7_t *arg1, const f7_t *arg1, const f7_t *arg2)
1641 wmov Z, r8
1642 wmov r24, Y ; &arg1
1643 ;; WHAT == 0 => call_xdd
1644 ;; WHAT != 0 => call_ddd
1645 ldd TMP, Y + n_frame + n_pushed + PC_SIZE
1646 tst TMP
1647 breq .Lxdd
1648 wmov r22, Y ; &arg1
1649 wmov r20, r16 ; &arg2
1650 XICALL
1651 wmov r24, Y ; &arg1
1652 XCALL F7_NAME (get_double)
1653.Lepilogue:
1654 do_epilogue_restores n_pushed, n_frame
1655.Lxdd:
1656 wmov r22, r16 ; &arg2
1657 XICALL
1658 rjmp .Lepilogue
1659ENDF call_ddd
1660#endif /* F7MOD_call_ddd_ */
1661
1662#include "f7-wraps.h"
1663
1664#endif /* !AVR_TINY */