]> git.ipfire.org Git - thirdparty/gcc.git/blame - libgcc/config/avr/lib1funcs-fixed.S
Update copyright years.
[thirdparty/gcc.git] / libgcc / config / avr / lib1funcs-fixed.S
CommitLineData
e55e4056 1/* -*- Mode: Asm -*- */
83ffe9cd 2;; Copyright (C) 2012-2023 Free Software Foundation, Inc.
e55e4056
GJL
3;; Contributed by Sean D'Epagnier (sean@depagnier.com)
4;; Georg-Johann Lay (avr@gjlay.de)
5
6;; This file is free software; you can redistribute it and/or modify it
7;; under the terms of the GNU General Public License as published by the
8;; Free Software Foundation; either version 3, or (at your option) any
9;; later version.
10
11;; In addition to the permissions in the GNU General Public License, the
12;; Free Software Foundation gives you unlimited permission to link the
13;; compiled version of this file into combinations with other programs,
14;; and to distribute those combinations without any restriction coming
15;; from the use of this file. (The General Public License restrictions
16;; do apply in other respects; for example, they cover modification of
17;; the file, and distribution when not linked into a combine
18;; executable.)
19
20;; This file is distributed in the hope that it will be useful, but
21;; WITHOUT ANY WARRANTY; without even the implied warranty of
22;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23;; General Public License for more details.
24
25;; You should have received a copy of the GNU General Public License
26;; along with this program; see the file COPYING. If not, write to
27;; the Free Software Foundation, 51 Franklin Street, Fifth Floor,
28;; Boston, MA 02110-1301, USA.
29
30;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
31;; Fixed point library routines for AVR
32;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
33
c1dd9790
JR
34#if defined __AVR_TINY__
35#define __zero_reg__ r17
36#define __tmp_reg__ r16
37#else
38#define __zero_reg__ r1
39#define __tmp_reg__ r0
40#endif
41
e55e4056
GJL
42.section .text.libgcc.fixed, "ax", @progbits
43
c1dd9790
JR
44#ifndef __AVR_TINY__
45
e55e4056
GJL
46;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
47;; Conversions to float
48;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
49
50#if defined (L_fractqqsf)
51DEFUN __fractqqsf
52 ;; Move in place for SA -> SF conversion
53 clr r22
54 mov r23, r24
e55e4056 55 ;; Sign-extend
e13d9d5a 56 lsl r24
e55e4056
GJL
57 sbc r24, r24
58 mov r25, r24
59 XJMP __fractsasf
60ENDF __fractqqsf
61#endif /* L_fractqqsf */
62
63#if defined (L_fractuqqsf)
64DEFUN __fractuqqsf
65 ;; Move in place for USA -> SF conversion
66 clr r22
67 mov r23, r24
68 ;; Zero-extend
69 clr r24
70 clr r25
71 XJMP __fractusasf
72ENDF __fractuqqsf
73#endif /* L_fractuqqsf */
74
75#if defined (L_fracthqsf)
76DEFUN __fracthqsf
77 ;; Move in place for SA -> SF conversion
78 wmov 22, 24
e55e4056 79 ;; Sign-extend
e13d9d5a 80 lsl r25
e55e4056
GJL
81 sbc r24, r24
82 mov r25, r24
83 XJMP __fractsasf
84ENDF __fracthqsf
85#endif /* L_fracthqsf */
86
87#if defined (L_fractuhqsf)
88DEFUN __fractuhqsf
89 ;; Move in place for USA -> SF conversion
90 wmov 22, 24
91 ;; Zero-extend
92 clr r24
93 clr r25
94 XJMP __fractusasf
95ENDF __fractuhqsf
96#endif /* L_fractuhqsf */
97
98#if defined (L_fracthasf)
99DEFUN __fracthasf
100 ;; Move in place for SA -> SF conversion
101 clr r22
102 mov r23, r24
103 mov r24, r25
104 ;; Sign-extend
105 lsl r25
106 sbc r25, r25
107 XJMP __fractsasf
108ENDF __fracthasf
109#endif /* L_fracthasf */
110
111#if defined (L_fractuhasf)
112DEFUN __fractuhasf
113 ;; Move in place for USA -> SF conversion
114 clr r22
115 mov r23, r24
116 mov r24, r25
117 ;; Zero-extend
118 clr r25
119 XJMP __fractusasf
120ENDF __fractuhasf
121#endif /* L_fractuhasf */
122
123
124#if defined (L_fractsqsf)
125DEFUN __fractsqsf
126 XCALL __floatsisf
127 ;; Divide non-zero results by 2^31 to move the
128 ;; decimal point into place
129 tst r25
130 breq 0f
131 subi r24, exp_lo (31)
132 sbci r25, exp_hi (31)
1330: ret
134ENDF __fractsqsf
135#endif /* L_fractsqsf */
136
137#if defined (L_fractusqsf)
138DEFUN __fractusqsf
139 XCALL __floatunsisf
140 ;; Divide non-zero results by 2^32 to move the
141 ;; decimal point into place
142 cpse r25, __zero_reg__
143 subi r25, exp_hi (32)
144 ret
145ENDF __fractusqsf
146#endif /* L_fractusqsf */
147
148#if defined (L_fractsasf)
149DEFUN __fractsasf
150 XCALL __floatsisf
e13d9d5a 151 ;; Divide non-zero results by 2^15 to move the
e55e4056 152 ;; decimal point into place
e13d9d5a
GJL
153 tst r25
154 breq 0f
155 subi r24, exp_lo (15)
156 sbci r25, exp_hi (15)
1570: ret
e55e4056
GJL
158ENDF __fractsasf
159#endif /* L_fractsasf */
160
161#if defined (L_fractusasf)
162DEFUN __fractusasf
163 XCALL __floatunsisf
164 ;; Divide non-zero results by 2^16 to move the
165 ;; decimal point into place
166 cpse r25, __zero_reg__
167 subi r25, exp_hi (16)
168 ret
169ENDF __fractusasf
170#endif /* L_fractusasf */
171
172;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
173;; Conversions from float
174;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
00892272 175
e55e4056
GJL
176#if defined (L_fractsfqq)
177DEFUN __fractsfqq
178 ;; Multiply with 2^{24+7} to get a QQ result in r25
179 subi r24, exp_lo (-31)
180 sbci r25, exp_hi (-31)
181 XCALL __fixsfsi
182 mov r24, r25
183 ret
184ENDF __fractsfqq
185#endif /* L_fractsfqq */
186
187#if defined (L_fractsfuqq)
188DEFUN __fractsfuqq
189 ;; Multiply with 2^{24+8} to get a UQQ result in r25
190 subi r25, exp_hi (-32)
191 XCALL __fixunssfsi
192 mov r24, r25
193 ret
194ENDF __fractsfuqq
195#endif /* L_fractsfuqq */
196
197#if defined (L_fractsfha)
198DEFUN __fractsfha
e13d9d5a
GJL
199 ;; Multiply with 2^{16+7} to get a HA result in r25:r24
200 subi r24, exp_lo (-23)
201 sbci r25, exp_hi (-23)
e55e4056
GJL
202 XJMP __fixsfsi
203ENDF __fractsfha
204#endif /* L_fractsfha */
205
206#if defined (L_fractsfuha)
207DEFUN __fractsfuha
208 ;; Multiply with 2^24 to get a UHA result in r25:r24
209 subi r25, exp_hi (-24)
210 XJMP __fixunssfsi
211ENDF __fractsfuha
212#endif /* L_fractsfuha */
213
214#if defined (L_fractsfhq)
e13d9d5a 215FALIAS __fractsfsq
e55e4056
GJL
216
217DEFUN __fractsfhq
218 ;; Multiply with 2^{16+15} to get a HQ result in r25:r24
219 ;; resp. with 2^31 to get a SQ result in r25:r22
220 subi r24, exp_lo (-31)
221 sbci r25, exp_hi (-31)
222 XJMP __fixsfsi
223ENDF __fractsfhq
224#endif /* L_fractsfhq */
225
226#if defined (L_fractsfuhq)
e13d9d5a 227FALIAS __fractsfusq
e55e4056
GJL
228
229DEFUN __fractsfuhq
230 ;; Multiply with 2^{16+16} to get a UHQ result in r25:r24
231 ;; resp. with 2^32 to get a USQ result in r25:r22
232 subi r25, exp_hi (-32)
233 XJMP __fixunssfsi
234ENDF __fractsfuhq
235#endif /* L_fractsfuhq */
236
237#if defined (L_fractsfsa)
238DEFUN __fractsfsa
e13d9d5a
GJL
239 ;; Multiply with 2^15 to get a SA result in r25:r22
240 subi r24, exp_lo (-15)
241 sbci r25, exp_hi (-15)
e55e4056
GJL
242 XJMP __fixsfsi
243ENDF __fractsfsa
244#endif /* L_fractsfsa */
245
246#if defined (L_fractsfusa)
247DEFUN __fractsfusa
248 ;; Multiply with 2^16 to get a USA result in r25:r22
249 subi r25, exp_hi (-16)
250 XJMP __fixunssfsi
251ENDF __fractsfusa
252#endif /* L_fractsfusa */
253
254
255;; For multiplication the functions here are called directly from
256;; avr-fixed.md instead of using the standard libcall mechanisms.
257;; This can make better code because GCC knows exactly which
258;; of the call-used registers (not all of them) are clobbered. */
259
260/*******************************************************
261 Fractional Multiplication 8 x 8 without MUL
262*******************************************************/
263
264#if defined (L_mulqq3) && !defined (__AVR_HAVE_MUL__)
265;;; R23 = R24 * R25
266;;; Clobbers: __tmp_reg__, R22, R24, R25
267;;; Rounding: ???
268DEFUN __mulqq3
269 XCALL __fmuls
270 ;; TR 18037 requires that (-1) * (-1) does not overflow
271 ;; The only input that can produce -1 is (-1)^2.
272 dec r23
273 brvs 0f
274 inc r23
2750: ret
276ENDF __mulqq3
277#endif /* L_mulqq3 && ! HAVE_MUL */
278
279/*******************************************************
280 Fractional Multiply .16 x .16 with and without MUL
281*******************************************************/
282
283#if defined (L_mulhq3)
284;;; Same code with and without MUL, but the interfaces differ:
285;;; no MUL: (R25:R24) = (R22:R23) * (R24:R25)
286;;; Clobbers: ABI, called by optabs
287;;; MUL: (R25:R24) = (R19:R18) * (R27:R26)
288;;; Clobbers: __tmp_reg__, R22, R23
289;;; Rounding: -0.5 LSB <= error <= 0.5 LSB
290DEFUN __mulhq3
291 XCALL __mulhisi3
292 ;; Shift result into place
293 lsl r23
294 rol r24
295 rol r25
296 brvs 1f
297 ;; Round
298 sbrc r23, 7
299 adiw r24, 1
300 ret
3011: ;; Overflow. TR 18037 requires (-1)^2 not to overflow
302 ldi r24, lo8 (0x7fff)
303 ldi r25, hi8 (0x7fff)
304 ret
305ENDF __mulhq3
306#endif /* defined (L_mulhq3) */
307
308#if defined (L_muluhq3)
309;;; Same code with and without MUL, but the interfaces differ:
310;;; no MUL: (R25:R24) *= (R23:R22)
311;;; Clobbers: ABI, called by optabs
312;;; MUL: (R25:R24) = (R19:R18) * (R27:R26)
313;;; Clobbers: __tmp_reg__, R22, R23
314;;; Rounding: -0.5 LSB < error <= 0.5 LSB
315DEFUN __muluhq3
316 XCALL __umulhisi3
317 ;; Round
318 sbrc r23, 7
319 adiw r24, 1
320 ret
321ENDF __muluhq3
322#endif /* L_muluhq3 */
323
324
325/*******************************************************
326 Fixed Multiply 8.8 x 8.8 with and without MUL
327*******************************************************/
328
329#if defined (L_mulha3)
330;;; Same code with and without MUL, but the interfaces differ:
331;;; no MUL: (R25:R24) = (R22:R23) * (R24:R25)
332;;; Clobbers: ABI, called by optabs
333;;; MUL: (R25:R24) = (R19:R18) * (R27:R26)
334;;; Clobbers: __tmp_reg__, R22, R23
335;;; Rounding: -0.5 LSB <= error <= 0.5 LSB
336DEFUN __mulha3
337 XCALL __mulhisi3
e13d9d5a
GJL
338 lsl r22
339 rol r23
340 rol r24
e55e4056
GJL
341 XJMP __muluha3_round
342ENDF __mulha3
343#endif /* L_mulha3 */
344
345#if defined (L_muluha3)
346;;; Same code with and without MUL, but the interfaces differ:
347;;; no MUL: (R25:R24) *= (R23:R22)
348;;; Clobbers: ABI, called by optabs
349;;; MUL: (R25:R24) = (R19:R18) * (R27:R26)
350;;; Clobbers: __tmp_reg__, R22, R23
351;;; Rounding: -0.5 LSB < error <= 0.5 LSB
352DEFUN __muluha3
353 XCALL __umulhisi3
354 XJMP __muluha3_round
355ENDF __muluha3
356#endif /* L_muluha3 */
357
358#if defined (L_muluha3_round)
359DEFUN __muluha3_round
360 ;; Shift result into place
361 mov r25, r24
362 mov r24, r23
363 ;; Round
364 sbrc r22, 7
365 adiw r24, 1
366 ret
367ENDF __muluha3_round
368#endif /* L_muluha3_round */
369
370
371/*******************************************************
372 Fixed Multiplication 16.16 x 16.16
373*******************************************************/
374
e13d9d5a
GJL
375;; Bits outside the result (below LSB), used in the signed version
376#define GUARD __tmp_reg__
377
e55e4056
GJL
378#if defined (__AVR_HAVE_MUL__)
379
380;; Multiplier
381#define A0 16
382#define A1 A0+1
383#define A2 A1+1
384#define A3 A2+1
385
386;; Multiplicand
387#define B0 20
388#define B1 B0+1
389#define B2 B1+1
390#define B3 B2+1
391
392;; Result
393#define C0 24
394#define C1 C0+1
395#define C2 C1+1
396#define C3 C2+1
397
398#if defined (L_mulusa3)
399;;; (C3:C0) = (A3:A0) * (B3:B0)
e13d9d5a
GJL
400DEFUN __mulusa3
401 set
402 ;; Fallthru
403ENDF __mulusa3
404
405;;; Round for last digit iff T = 1
406;;; Return guard bits in GUARD (__tmp_reg__).
407;;; Rounding, T = 0: -1.0 LSB < error <= 0 LSB
408;;; Rounding, T = 1: -0.5 LSB < error <= 0.5 LSB
409DEFUN __mulusa3_round
e55e4056
GJL
410 ;; Some of the MUL instructions have LSBs outside the result.
411 ;; Don't ignore these LSBs in order to tame rounding error.
412 ;; Use C2/C3 for these LSBs.
413
414 clr C0
415 clr C1
416 mul A0, B0 $ movw C2, r0
417
418 mul A1, B0 $ add C3, r0 $ adc C0, r1
419 mul A0, B1 $ add C3, r0 $ adc C0, r1 $ rol C1
00892272 420
e13d9d5a
GJL
421 ;; Round if T = 1. Store guarding bits outside the result for rounding
422 ;; and left-shift by the signed version (function below).
423 brtc 0f
e55e4056
GJL
424 sbrc C3, 7
425 adiw C0, 1
e13d9d5a 4260: push C3
00892272 427
e55e4056
GJL
428 ;; The following MULs don't have LSBs outside the result.
429 ;; C2/C3 is the high part.
430
431 mul A0, B2 $ add C0, r0 $ adc C1, r1 $ sbc C2, C2
432 mul A1, B1 $ add C0, r0 $ adc C1, r1 $ sbci C2, 0
433 mul A2, B0 $ add C0, r0 $ adc C1, r1 $ sbci C2, 0
434 neg C2
435
436 mul A0, B3 $ add C1, r0 $ adc C2, r1 $ sbc C3, C3
437 mul A1, B2 $ add C1, r0 $ adc C2, r1 $ sbci C3, 0
438 mul A2, B1 $ add C1, r0 $ adc C2, r1 $ sbci C3, 0
439 mul A3, B0 $ add C1, r0 $ adc C2, r1 $ sbci C3, 0
440 neg C3
00892272 441
e55e4056
GJL
442 mul A1, B3 $ add C2, r0 $ adc C3, r1
443 mul A2, B2 $ add C2, r0 $ adc C3, r1
444 mul A3, B1 $ add C2, r0 $ adc C3, r1
00892272 445
e55e4056
GJL
446 mul A2, B3 $ add C3, r0
447 mul A3, B2 $ add C3, r0
448
e13d9d5a
GJL
449 ;; Guard bits used in the signed version below.
450 pop GUARD
e55e4056
GJL
451 clr __zero_reg__
452 ret
e13d9d5a 453ENDF __mulusa3_round
e55e4056
GJL
454#endif /* L_mulusa3 */
455
456#if defined (L_mulsa3)
457;;; (C3:C0) = (A3:A0) * (B3:B0)
e13d9d5a 458;;; Clobbers: __tmp_reg__, T
e55e4056
GJL
459;;; Rounding: -0.5 LSB <= error <= 0.5 LSB
460DEFUN __mulsa3
e13d9d5a
GJL
461 clt
462 XCALL __mulusa3_round
463 ;; A posteriori sign extension of the operands
e55e4056 464 tst B3
e13d9d5a 465 brpl 1f
e55e4056
GJL
466 sub C2, A0
467 sbc C3, A1
4681: sbrs A3, 7
e13d9d5a 469 rjmp 2f
e55e4056
GJL
470 sub C2, B0
471 sbc C3, B1
e13d9d5a
GJL
4722:
473 ;; Shift 1 bit left to adjust for 15 fractional bits
474 lsl GUARD
475 rol C0
476 rol C1
477 rol C2
478 rol C3
479 ;; Round last digit
480 lsl GUARD
481 adc C0, __zero_reg__
482 adc C1, __zero_reg__
483 adc C2, __zero_reg__
484 adc C3, __zero_reg__
e55e4056
GJL
485 ret
486ENDF __mulsa3
487#endif /* L_mulsa3 */
488
489#undef A0
490#undef A1
491#undef A2
492#undef A3
493#undef B0
494#undef B1
495#undef B2
496#undef B3
497#undef C0
498#undef C1
499#undef C2
500#undef C3
501
502#else /* __AVR_HAVE_MUL__ */
503
504#define A0 18
505#define A1 A0+1
506#define A2 A0+2
507#define A3 A0+3
508
509#define B0 22
510#define B1 B0+1
511#define B2 B0+2
512#define B3 B0+3
513
514#define C0 22
515#define C1 C0+1
516#define C2 C0+2
517#define C3 C0+3
518
519;; __tmp_reg__
520#define CC0 0
521;; __zero_reg__
522#define CC1 1
523#define CC2 16
524#define CC3 17
525
526#define AA0 26
527#define AA1 AA0+1
528#define AA2 30
529#define AA3 AA2+1
530
531#if defined (L_mulsa3)
532;;; (R25:R22) *= (R21:R18)
533;;; Clobbers: ABI, called by optabs
534;;; Rounding: -1 LSB <= error <= 1 LSB
535DEFUN __mulsa3
536 push B0
537 push B1
e13d9d5a
GJL
538 push B3
539 clt
540 XCALL __mulusa3_round
541 pop r30
542 ;; sign-extend B
543 bst r30, 7
544 brtc 1f
545 ;; A1, A0 survived in R27:R26
546 sub C2, AA0
547 sbc C3, AA1
5481:
549 pop AA1 ;; B1
550 pop AA0 ;; B0
551
552 ;; sign-extend A. A3 survived in R31
e55e4056 553 bst AA3, 7
e13d9d5a 554 brtc 2f
e55e4056
GJL
555 sub C2, AA0
556 sbc C3, AA1
e13d9d5a
GJL
5572:
558 ;; Shift 1 bit left to adjust for 15 fractional bits
559 lsl GUARD
560 rol C0
561 rol C1
562 rol C2
563 rol C3
564 ;; Round last digit
565 lsl GUARD
566 adc C0, __zero_reg__
567 adc C1, __zero_reg__
568 adc C2, __zero_reg__
569 adc C3, __zero_reg__
570 ret
e55e4056
GJL
571ENDF __mulsa3
572#endif /* L_mulsa3 */
573
574#if defined (L_mulusa3)
575;;; (R25:R22) *= (R21:R18)
e13d9d5a 576;;; Clobbers: ABI, called by optabs
e55e4056 577;;; Rounding: -1 LSB <= error <= 1 LSB
e13d9d5a
GJL
578DEFUN __mulusa3
579 set
580 ;; Fallthru
581ENDF __mulusa3
582
583;;; A[] survives in 26, 27, 30, 31
584;;; Also used by __mulsa3 with T = 0
585;;; Round if T = 1
586;;; Return Guard bits in GUARD (__tmp_reg__), used by signed version.
587DEFUN __mulusa3_round
e55e4056
GJL
588 push CC2
589 push CC3
590 ; clear result
591 clr __tmp_reg__
592 wmov CC2, CC0
593 ; save multiplicand
594 wmov AA0, A0
595 wmov AA2, A2
596 rjmp 3f
597
598 ;; Loop the integral part
599
6001: ;; CC += A * 2^n; n >= 0
601 add CC0,A0 $ adc CC1,A1 $ adc CC2,A2 $ adc CC3,A3
602
6032: ;; A <<= 1
604 lsl A0 $ rol A1 $ rol A2 $ rol A3
605
6063: ;; IBIT(B) >>= 1
607 ;; Carry = n-th bit of B; n >= 0
608 lsr B3
609 ror B2
610 brcs 1b
611 sbci B3, 0
612 brne 2b
613
614 ;; Loop the fractional part
615 ;; B2/B3 is 0 now, use as guard bits for rounding
616 ;; Restore multiplicand
617 wmov A0, AA0
618 wmov A2, AA2
619 rjmp 5f
620
6214: ;; CC += A:Guard * 2^n; n < 0
622 add B3,B2 $ adc CC0,A0 $ adc CC1,A1 $ adc CC2,A2 $ adc CC3,A3
6235:
624 ;; A:Guard >>= 1
625 lsr A3 $ ror A2 $ ror A1 $ ror A0 $ ror B2
626
627 ;; FBIT(B) <<= 1
628 ;; Carry = n-th bit of B; n < 0
629 lsl B0
630 rol B1
631 brcs 4b
632 sbci B0, 0
633 brne 5b
634
e13d9d5a
GJL
635 ;; Save guard bits and set carry for rounding
636 push B3
e55e4056 637 lsl B3
e13d9d5a 638 ;; Move result into place
e55e4056
GJL
639 wmov C2, CC2
640 wmov C0, CC0
641 clr __zero_reg__
e13d9d5a
GJL
642 brtc 6f
643 ;; Round iff T = 1
e55e4056
GJL
644 adc C0, __zero_reg__
645 adc C1, __zero_reg__
646 adc C2, __zero_reg__
647 adc C3, __zero_reg__
00892272 6486:
e13d9d5a 649 pop GUARD
e55e4056
GJL
650 ;; Epilogue
651 pop CC3
652 pop CC2
653 ret
e13d9d5a 654ENDF __mulusa3_round
e55e4056
GJL
655#endif /* L_mulusa3 */
656
657#undef A0
658#undef A1
659#undef A2
660#undef A3
661#undef B0
662#undef B1
663#undef B2
664#undef B3
665#undef C0
666#undef C1
667#undef C2
668#undef C3
669#undef AA0
670#undef AA1
671#undef AA2
672#undef AA3
673#undef CC0
674#undef CC1
675#undef CC2
676#undef CC3
677
678#endif /* __AVR_HAVE_MUL__ */
679
e13d9d5a
GJL
680#undef GUARD
681
e68a4ef6
GJL
682/***********************************************************
683 Fixed unsigned saturated Multiplication 8.8 x 8.8
684***********************************************************/
685
686#define C0 22
687#define C1 C0+1
688#define C2 C0+2
689#define C3 C0+3
690#define SS __tmp_reg__
691
692#if defined (L_usmuluha3)
693DEFUN __usmuluha3
694 ;; Widening multiply
695#ifdef __AVR_HAVE_MUL__
696 ;; Adjust interface
697 movw R26, R22
698 movw R18, R24
699#endif /* HAVE MUL */
700 XCALL __umulhisi3
701 tst C3
702 brne .Lmax
703 ;; Round, target is in C1..C2
704 lsl C0
705 adc C1, __zero_reg__
706 adc C2, __zero_reg__
707 brcs .Lmax
708 ;; Move result into place
709 mov C3, C2
710 mov C2, C1
711 ret
712.Lmax:
713 ;; Saturate
714 ldi C2, 0xff
715 ldi C3, 0xff
716 ret
717ENDF __usmuluha3
718#endif /* L_usmuluha3 */
719
720/***********************************************************
721 Fixed signed saturated Multiplication s8.7 x s8.7
722***********************************************************/
723
724#if defined (L_ssmulha3)
725DEFUN __ssmulha3
726 ;; Widening multiply
727#ifdef __AVR_HAVE_MUL__
728 ;; Adjust interface
729 movw R26, R22
730 movw R18, R24
731#endif /* HAVE MUL */
732 XCALL __mulhisi3
733 ;; Adjust decimal point
734 lsl C0
735 rol C1
736 rol C2
737 brvs .LsatC3.3
738 ;; The 9 MSBs must be the same
739 rol C3
740 sbc SS, SS
741 cp C3, SS
742 brne .LsatSS
743 ;; Round
744 lsl C0
745 adc C1, __zero_reg__
746 adc C2, __zero_reg__
747 brvs .Lmax
748 ;; Move result into place
749 mov C3, C2
750 mov C2, C1
751 ret
752.Lmax:
753 ;; Load 0x7fff
754 clr C3
755.LsatC3.3:
756 ;; C3 < 0 --> 0x8000
757 ;; C3 >= 0 --> 0x7fff
758 mov SS, C3
759.LsatSS:
760 ;; Load min / max value:
761 ;; SS = -1 --> 0x8000
762 ;; SS = 0 --> 0x7fff
763 ldi C3, 0x7f
764 ldi C2, 0xff
765 sbrc SS, 7
766 adiw C2, 1
767 ret
768ENDF __ssmulha3
769#endif /* L_ssmulha3 */
770
771#undef C0
772#undef C1
773#undef C2
774#undef C3
775#undef SS
776
777/***********************************************************
778 Fixed unsigned saturated Multiplication 16.16 x 16.16
779***********************************************************/
780
781#define C0 18
782#define C1 C0+1
783#define C2 C0+2
784#define C3 C0+3
785#define C4 C0+4
786#define C5 C0+5
787#define C6 C0+6
788#define C7 C0+7
789#define SS __tmp_reg__
790
791#if defined (L_usmulusa3)
792;; R22[4] = R22[4] *{ssat} R18[4]
793;; Ordinary ABI function
794DEFUN __usmulusa3
795 ;; Widening multiply
796 XCALL __umulsidi3
797 or C7, C6
798 brne .Lmax
799 ;; Round, target is in C2..C5
800 lsl C1
801 adc C2, __zero_reg__
802 adc C3, __zero_reg__
803 adc C4, __zero_reg__
804 adc C5, __zero_reg__
805 brcs .Lmax
806 ;; Move result into place
807 wmov C6, C4
808 wmov C4, C2
809 ret
810.Lmax:
811 ;; Saturate
812 ldi C7, 0xff
813 ldi C6, 0xff
814 wmov C4, C6
815 ret
816ENDF __usmulusa3
817#endif /* L_usmulusa3 */
818
819/***********************************************************
820 Fixed signed saturated Multiplication s16.15 x s16.15
821***********************************************************/
822
823#if defined (L_ssmulsa3)
824;; R22[4] = R22[4] *{ssat} R18[4]
825;; Ordinary ABI function
826DEFUN __ssmulsa3
827 ;; Widening multiply
828 XCALL __mulsidi3
829 ;; Adjust decimal point
830 lsl C1
831 rol C2
832 rol C3
833 rol C4
834 rol C5
835 brvs .LsatC7.7
836 ;; The 17 MSBs must be the same
837 rol C6
838 rol C7
839 sbc SS, SS
840 cp C6, SS
841 cpc C7, SS
842 brne .LsatSS
843 ;; Round
844 lsl C1
845 adc C2, __zero_reg__
846 adc C3, __zero_reg__
847 adc C4, __zero_reg__
848 adc C5, __zero_reg__
849 brvs .Lmax
850 ;; Move result into place
851 wmov C6, C4
852 wmov C4, C2
853 ret
854
855.Lmax:
856 ;; Load 0x7fffffff
857 clr C7
858.LsatC7.7:
859 ;; C7 < 0 --> 0x80000000
860 ;; C7 >= 0 --> 0x7fffffff
861 lsl C7
862 sbc SS, SS
863.LsatSS:
864 ;; Load min / max value:
865 ;; SS = -1 --> 0x80000000
866 ;; SS = 0 --> 0x7fffffff
867 com SS
868 mov C4, SS
869 mov C5, C4
870 wmov C6, C4
871 subi C7, 0x80
872 ret
873ENDF __ssmulsa3
874#endif /* L_ssmulsa3 */
875
876#undef C0
877#undef C1
878#undef C2
879#undef C3
880#undef C4
881#undef C5
882#undef C6
883#undef C7
884#undef SS
885
e55e4056
GJL
886/*******************************************************
887 Fractional Division 8 / 8
888*******************************************************/
889
890#define r_divd r25 /* dividend */
891#define r_quo r24 /* quotient */
892#define r_div r22 /* divisor */
e13d9d5a 893#define r_sign __tmp_reg__
e55e4056
GJL
894
895#if defined (L_divqq3)
896DEFUN __divqq3
e13d9d5a
GJL
897 mov r_sign, r_divd
898 eor r_sign, r_div
e55e4056
GJL
899 sbrc r_div, 7
900 neg r_div
901 sbrc r_divd, 7
902 neg r_divd
e13d9d5a 903 XCALL __divqq_helper
e55e4056 904 lsr r_quo
e13d9d5a 905 sbrc r_sign, 7 ; negate result if needed
e55e4056
GJL
906 neg r_quo
907 ret
e55e4056 908ENDF __divqq3
e13d9d5a 909#endif /* L_divqq3 */
e55e4056
GJL
910
911#if defined (L_udivuqq3)
912DEFUN __udivuqq3
e13d9d5a
GJL
913 cp r_divd, r_div
914 brsh 0f
915 XJMP __divqq_helper
916 ;; Result is out of [0, 1) ==> Return 1 - eps.
9170: ldi r_quo, 0xff
918 ret
919ENDF __udivuqq3
920#endif /* L_udivuqq3 */
921
922
923#if defined (L_divqq_helper)
924DEFUN __divqq_helper
e55e4056
GJL
925 clr r_quo ; clear quotient
926 inc __zero_reg__ ; init loop counter, used per shift
927__udivuqq3_loop:
928 lsl r_divd ; shift dividend
929 brcs 0f ; dividend overflow
930 cp r_divd,r_div ; compare dividend & divisor
931 brcc 0f ; dividend >= divisor
932 rol r_quo ; shift quotient (with CARRY)
933 rjmp __udivuqq3_cont
9340:
935 sub r_divd,r_div ; restore dividend
936 lsl r_quo ; shift quotient (without CARRY)
937__udivuqq3_cont:
938 lsl __zero_reg__ ; shift loop-counter bit
939 brne __udivuqq3_loop
940 com r_quo ; complement result
941 ; because C flag was complemented in loop
942 ret
e13d9d5a
GJL
943ENDF __divqq_helper
944#endif /* L_divqq_helper */
e55e4056
GJL
945
946#undef r_divd
947#undef r_quo
948#undef r_div
e13d9d5a 949#undef r_sign
e55e4056
GJL
950
951
952/*******************************************************
953 Fractional Division 16 / 16
954*******************************************************/
955#define r_divdL 26 /* dividend Low */
956#define r_divdH 27 /* dividend Hig */
957#define r_quoL 24 /* quotient Low */
958#define r_quoH 25 /* quotient High */
959#define r_divL 22 /* divisor */
960#define r_divH 23 /* divisor */
961#define r_cnt 21
962
963#if defined (L_divhq3)
964DEFUN __divhq3
965 mov r0, r_divdH
966 eor r0, r_divH
967 sbrs r_divH, 7
968 rjmp 1f
969 NEG2 r_divL
9701:
971 sbrs r_divdH, 7
972 rjmp 2f
973 NEG2 r_divdL
9742:
975 cp r_divdL, r_divL
976 cpc r_divdH, r_divH
977 breq __divhq3_minus1 ; if equal return -1
978 XCALL __udivuhq3
979 lsr r_quoH
980 ror r_quoL
981 brpl 9f
982 ;; negate result if needed
983 NEG2 r_quoL
9849:
985 ret
986__divhq3_minus1:
987 ldi r_quoH, 0x80
988 clr r_quoL
989 ret
990ENDF __divhq3
991#endif /* defined (L_divhq3) */
992
993#if defined (L_udivuhq3)
994DEFUN __udivuhq3
995 sub r_quoH,r_quoH ; clear quotient and carry
996 ;; FALLTHRU
997ENDF __udivuhq3
998
999DEFUN __udivuha3_common
1000 clr r_quoL ; clear quotient
1001 ldi r_cnt,16 ; init loop counter
1002__udivuhq3_loop:
1003 rol r_divdL ; shift dividend (with CARRY)
1004 rol r_divdH
1005 brcs __udivuhq3_ep ; dividend overflow
1006 cp r_divdL,r_divL ; compare dividend & divisor
1007 cpc r_divdH,r_divH
1008 brcc __udivuhq3_ep ; dividend >= divisor
1009 rol r_quoL ; shift quotient (with CARRY)
1010 rjmp __udivuhq3_cont
1011__udivuhq3_ep:
1012 sub r_divdL,r_divL ; restore dividend
1013 sbc r_divdH,r_divH
1014 lsl r_quoL ; shift quotient (without CARRY)
1015__udivuhq3_cont:
1016 rol r_quoH ; shift quotient
1017 dec r_cnt ; decrement loop counter
1018 brne __udivuhq3_loop
1019 com r_quoL ; complement result
1020 com r_quoH ; because C flag was complemented in loop
1021 ret
1022ENDF __udivuha3_common
1023#endif /* defined (L_udivuhq3) */
1024
1025/*******************************************************
1026 Fixed Division 8.8 / 8.8
1027*******************************************************/
1028#if defined (L_divha3)
1029DEFUN __divha3
1030 mov r0, r_divdH
1031 eor r0, r_divH
1032 sbrs r_divH, 7
1033 rjmp 1f
1034 NEG2 r_divL
10351:
1036 sbrs r_divdH, 7
1037 rjmp 2f
1038 NEG2 r_divdL
10392:
1040 XCALL __udivuha3
e13d9d5a
GJL
1041 lsr r_quoH ; adjust to 7 fractional bits
1042 ror r_quoL
e55e4056
GJL
1043 sbrs r0, 7 ; negate result if needed
1044 ret
1045 NEG2 r_quoL
1046 ret
1047ENDF __divha3
1048#endif /* defined (L_divha3) */
1049
1050#if defined (L_udivuha3)
1051DEFUN __udivuha3
1052 mov r_quoH, r_divdL
1053 mov r_divdL, r_divdH
1054 clr r_divdH
1055 lsl r_quoH ; shift quotient into carry
1056 XJMP __udivuha3_common ; same as fractional after rearrange
1057ENDF __udivuha3
1058#endif /* defined (L_udivuha3) */
1059
1060#undef r_divdL
1061#undef r_divdH
1062#undef r_quoL
1063#undef r_quoH
1064#undef r_divL
1065#undef r_divH
1066#undef r_cnt
1067
1068/*******************************************************
1069 Fixed Division 16.16 / 16.16
1070*******************************************************/
1071
1072#define r_arg1L 24 /* arg1 gets passed already in place */
1073#define r_arg1H 25
1074#define r_arg1HL 26
1075#define r_arg1HH 27
1076#define r_divdL 26 /* dividend Low */
1077#define r_divdH 27
1078#define r_divdHL 30
1079#define r_divdHH 31 /* dividend High */
1080#define r_quoL 22 /* quotient Low */
1081#define r_quoH 23
1082#define r_quoHL 24
1083#define r_quoHH 25 /* quotient High */
1084#define r_divL 18 /* divisor Low */
1085#define r_divH 19
1086#define r_divHL 20
1087#define r_divHH 21 /* divisor High */
1088#define r_cnt __zero_reg__ /* loop count (0 after the loop!) */
1089
1090#if defined (L_divsa3)
1091DEFUN __divsa3
1092 mov r0, r_arg1HH
1093 eor r0, r_divHH
1094 sbrs r_divHH, 7
1095 rjmp 1f
1096 NEG4 r_divL
10971:
1098 sbrs r_arg1HH, 7
1099 rjmp 2f
1100 NEG4 r_arg1L
11012:
1102 XCALL __udivusa3
e13d9d5a
GJL
1103 lsr r_quoHH ; adjust to 15 fractional bits
1104 ror r_quoHL
1105 ror r_quoH
1106 ror r_quoL
e55e4056
GJL
1107 sbrs r0, 7 ; negate result if needed
1108 ret
51526856
GJL
1109 ;; negate r_quoL
1110 XJMP __negsi2
e55e4056
GJL
1111ENDF __divsa3
1112#endif /* defined (L_divsa3) */
1113
1114#if defined (L_udivusa3)
1115DEFUN __udivusa3
1116 ldi r_divdHL, 32 ; init loop counter
1117 mov r_cnt, r_divdHL
1118 clr r_divdHL
1119 clr r_divdHH
1120 wmov r_quoL, r_divdHL
1121 lsl r_quoHL ; shift quotient into carry
1122 rol r_quoHH
1123__udivusa3_loop:
1124 rol r_divdL ; shift dividend (with CARRY)
1125 rol r_divdH
1126 rol r_divdHL
1127 rol r_divdHH
1128 brcs __udivusa3_ep ; dividend overflow
1129 cp r_divdL,r_divL ; compare dividend & divisor
1130 cpc r_divdH,r_divH
1131 cpc r_divdHL,r_divHL
1132 cpc r_divdHH,r_divHH
1133 brcc __udivusa3_ep ; dividend >= divisor
1134 rol r_quoL ; shift quotient (with CARRY)
1135 rjmp __udivusa3_cont
1136__udivusa3_ep:
1137 sub r_divdL,r_divL ; restore dividend
1138 sbc r_divdH,r_divH
1139 sbc r_divdHL,r_divHL
1140 sbc r_divdHH,r_divHH
1141 lsl r_quoL ; shift quotient (without CARRY)
1142__udivusa3_cont:
1143 rol r_quoH ; shift quotient
1144 rol r_quoHL
1145 rol r_quoHH
1146 dec r_cnt ; decrement loop counter
1147 brne __udivusa3_loop
1148 com r_quoL ; complement result
1149 com r_quoH ; because C flag was complemented in loop
1150 com r_quoHL
1151 com r_quoHH
1152 ret
1153ENDF __udivusa3
1154#endif /* defined (L_udivusa3) */
1155
1156#undef r_arg1L
1157#undef r_arg1H
1158#undef r_arg1HL
1159#undef r_arg1HH
1160#undef r_divdL
1161#undef r_divdH
1162#undef r_divdHL
1163#undef r_divdHH
1164#undef r_quoL
1165#undef r_quoH
1166#undef r_quoHL
1167#undef r_quoHH
1168#undef r_divL
1169#undef r_divH
1170#undef r_divHL
1171#undef r_divHH
1172#undef r_cnt
51526856 1173
85d768f3
GJL
1174\f
1175;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1176;; Saturation, 1 Byte
1177;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1178
1179;; First Argument and Return Register
1180#define A0 24
1181
1182#if defined (L_ssabs_1)
1183DEFUN __ssabs_1
1184 sbrs A0, 7
1185 ret
1186 neg A0
1187 sbrc A0,7
1188 dec A0
1189 ret
1190ENDF __ssabs_1
1191#endif /* L_ssabs_1 */
1192
1193#undef A0
1194
1195
51526856
GJL
1196\f
1197;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1198;; Saturation, 2 Bytes
1199;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1200
1201;; First Argument and Return Register
1202#define A0 24
1203#define A1 A0+1
1204
1205#if defined (L_ssneg_2)
1206DEFUN __ssneg_2
1207 NEG2 A0
1208 brvc 0f
1209 sbiw A0, 1
12100: ret
1211ENDF __ssneg_2
1212#endif /* L_ssneg_2 */
1213
1214#if defined (L_ssabs_2)
1215DEFUN __ssabs_2
1216 sbrs A1, 7
1217 ret
1218 XJMP __ssneg_2
1219ENDF __ssabs_2
1220#endif /* L_ssabs_2 */
1221
1222#undef A0
1223#undef A1
1224
1225
1226\f
1227;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1228;; Saturation, 4 Bytes
1229;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1230
1231;; First Argument and Return Register
1232#define A0 22
1233#define A1 A0+1
1234#define A2 A0+2
1235#define A3 A0+3
1236
1237#if defined (L_ssneg_4)
1238DEFUN __ssneg_4
1239 XCALL __negsi2
1240 brvc 0f
1241 ldi A3, 0x7f
1242 ldi A2, 0xff
1243 ldi A1, 0xff
1244 ldi A0, 0xff
12450: ret
1246ENDF __ssneg_4
1247#endif /* L_ssneg_4 */
1248
1249#if defined (L_ssabs_4)
1250DEFUN __ssabs_4
1251 sbrs A3, 7
1252 ret
1253 XJMP __ssneg_4
1254ENDF __ssabs_4
1255#endif /* L_ssabs_4 */
1256
1257#undef A0
1258#undef A1
1259#undef A2
1260#undef A3
1261
1262
1263\f
1264;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1265;; Saturation, 8 Bytes
1266;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1267
1268;; First Argument and Return Register
1269#define A0 18
1270#define A1 A0+1
1271#define A2 A0+2
1272#define A3 A0+3
1273#define A4 A0+4
1274#define A5 A0+5
1275#define A6 A0+6
1276#define A7 A0+7
1277
1278#if defined (L_clr_8)
1279FALIAS __usneguta2
1280FALIAS __usneguda2
1281FALIAS __usnegudq2
1282
1283;; Clear Carry and all Bytes
1284DEFUN __clr_8
1285 ;; Clear Carry and set Z
1286 sub A7, A7
1287 ;; FALLTHRU
1288ENDF __clr_8
1289;; Propagate Carry to all Bytes, Carry unaltered
1290DEFUN __sbc_8
1291 sbc A7, A7
1292 sbc A6, A6
1293 wmov A4, A6
1294 wmov A2, A6
1295 wmov A0, A6
1296 ret
1297ENDF __sbc_8
1298#endif /* L_clr_8 */
1299
1300#if defined (L_ssneg_8)
1301FALIAS __ssnegta2
1302FALIAS __ssnegda2
1303FALIAS __ssnegdq2
1304
1305DEFUN __ssneg_8
1306 XCALL __negdi2
1307 brvc 0f
1308 ;; A[] = 0x7fffffff
1309 sec
1310 XCALL __sbc_8
1311 ldi A7, 0x7f
13120: ret
1313ENDF __ssneg_8
1314#endif /* L_ssneg_8 */
1315
1316#if defined (L_ssabs_8)
1317FALIAS __ssabsta2
1318FALIAS __ssabsda2
1319FALIAS __ssabsdq2
1320
1321DEFUN __ssabs_8
1322 sbrs A7, 7
1323 ret
1324 XJMP __ssneg_8
1325ENDF __ssabs_8
1326#endif /* L_ssabs_8 */
1327
1328;; Second Argument
1329#define B0 10
1330#define B1 B0+1
1331#define B2 B0+2
1332#define B3 B0+3
1333#define B4 B0+4
1334#define B5 B0+5
1335#define B6 B0+6
1336#define B7 B0+7
1337
1338#if defined (L_usadd_8)
1339FALIAS __usadduta3
1340FALIAS __usadduda3
1341FALIAS __usaddudq3
1342
1343DEFUN __usadd_8
1344 XCALL __adddi3
1345 brcs 0f
1346 ret
e13d9d5a
GJL
13470: ;; A[] = 0xffffffff
1348 XJMP __sbc_8
51526856
GJL
1349ENDF __usadd_8
1350#endif /* L_usadd_8 */
1351
1352#if defined (L_ussub_8)
1353FALIAS __ussubuta3
1354FALIAS __ussubuda3
1355FALIAS __ussubudq3
1356
1357DEFUN __ussub_8
1358 XCALL __subdi3
1359 brcs 0f
1360 ret
e13d9d5a
GJL
13610: ;; A[] = 0
1362 XJMP __clr_8
51526856
GJL
1363ENDF __ussub_8
1364#endif /* L_ussub_8 */
1365
1366#if defined (L_ssadd_8)
1367FALIAS __ssaddta3
1368FALIAS __ssaddda3
1369FALIAS __ssadddq3
1370
1371DEFUN __ssadd_8
51526856
GJL
1372 XCALL __adddi3
1373 brvc 0f
e13d9d5a 1374 ;; A = (B >= 0) ? INT64_MAX : INT64_MIN
51526856
GJL
1375 cpi B7, 0x80
1376 XCALL __sbc_8
1377 subi A7, 0x80
13780: ret
1379ENDF __ssadd_8
1380#endif /* L_ssadd_8 */
1381
1382#if defined (L_sssub_8)
1383FALIAS __sssubta3
1384FALIAS __sssubda3
1385FALIAS __sssubdq3
1386
1387DEFUN __sssub_8
1388 XCALL __subdi3
1389 brvc 0f
e13d9d5a 1390 ;; A = (B < 0) ? INT64_MAX : INT64_MIN
51526856
GJL
1391 ldi A7, 0x7f
1392 cp A7, B7
1393 XCALL __sbc_8
1394 subi A7, 0x80
13950: ret
1396ENDF __sssub_8
1397#endif /* L_sssub_8 */
1398
1399#undef A0
1400#undef A1
1401#undef A2
1402#undef A3
1403#undef A4
1404#undef A5
1405#undef A6
1406#undef A7
1407#undef B0
1408#undef B1
1409#undef B2
1410#undef B3
1411#undef B4
1412#undef B5
1413#undef B6
1414#undef B7
85d768f3
GJL
1415
1416\f
1417;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1418;; Rounding Helpers
1419;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1420
1421#ifdef L_mask1
1422
1423#define AA 24
1424#define CC 25
1425
1426;; R25 = 1 << (R24 & 7)
1427;; CC = 1 << (AA & 7)
1428;; Clobbers: None
1429DEFUN __mask1
1430 ;; CC = 2 ^ AA.1
1431 ldi CC, 1 << 2
1432 sbrs AA, 1
1433 ldi CC, 1 << 0
1434 ;; CC *= 2 ^ AA.0
1435 sbrc AA, 0
1436 lsl CC
1437 ;; CC *= 2 ^ AA.2
1438 sbrc AA, 2
1439 swap CC
1440 ret
1441ENDF __mask1
1442
1443#undef AA
1444#undef CC
1445#endif /* L_mask1 */
1446
1447;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1448
1449;; The rounding point. Any bits smaller than
1450;; 2^{-RP} will be cleared.
1451#define RP R24
1452
1453#define A0 22
1454#define A1 A0 + 1
1455
1456#define C0 24
1457#define C1 C0 + 1
1458
1459;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1460;; Rounding, 1 Byte
1461;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1462
1463#ifdef L_roundqq3
1464
1465;; R24 = round (R22, R24)
1466;; Clobbers: R22, __tmp_reg__
1467DEFUN __roundqq3
1468 mov __tmp_reg__, C1
1469 subi RP, __QQ_FBIT__ - 1
1470 neg RP
1471 ;; R25 = 1 << RP (Total offset is FBIT-1 - RP)
1472 XCALL __mask1
1473 mov C0, C1
1474 ;; Add-Saturate 2^{-RP-1}
1475 add A0, C0
1476 brvc 0f
02371798
GJL
1477 ldi C0, 0x7f
1478 rjmp 9f
85d768f3
GJL
14790: ;; Mask out bits beyond RP
1480 lsl C0
1481 neg C0
1482 and C0, A0
02371798 14839: mov C1, __tmp_reg__
85d768f3
GJL
1484 ret
1485ENDF __roundqq3
1486#endif /* L_roundqq3 */
1487
1488#ifdef L_rounduqq3
1489
1490;; R24 = round (R22, R24)
1491;; Clobbers: R22, __tmp_reg__
1492DEFUN __rounduqq3
1493 mov __tmp_reg__, C1
1494 subi RP, __UQQ_FBIT__ - 1
1495 neg RP
1496 ;; R25 = 1 << RP (Total offset is FBIT-1 - RP)
1497 XCALL __mask1
1498 mov C0, C1
1499 ;; Add-Saturate 2^{-RP-1}
1500 add A0, C0
1501 brcc 0f
02371798
GJL
1502 ldi C0, 0xff
1503 rjmp 9f
85d768f3
GJL
15040: ;; Mask out bits beyond RP
1505 lsl C0
1506 neg C0
1507 and C0, A0
02371798 15089: mov C1, __tmp_reg__
85d768f3
GJL
1509 ret
1510ENDF __rounduqq3
1511#endif /* L_rounduqq3 */
1512
1513;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1514;; Rounding, 2 Bytes
1515;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1516
1517#ifdef L_addmask_2
1518
1519;; [ R25:R24 = 1 << (R24 & 15)
1520;; R23:R22 += 1 << (R24 & 15) ]
1521;; SREG is set according to the addition
1522DEFUN __addmask_2
1523 ;; R25 = 1 << (R24 & 7)
1524 XCALL __mask1
1525 cpi RP, 1 << 3
1526 sbc C0, C0
1527 ;; Swap C0 and C1 if RP.3 was set
1528 and C0, C1
1529 eor C1, C0
1530 ;; Finally, add the power-of-two: A[] += C[]
1531 add A0, C0
1532 adc A1, C1
1533 ret
1534ENDF __addmask_2
1535#endif /* L_addmask_2 */
1536
1537#ifdef L_round_s2
1538
1539;; R25:R24 = round (R23:R22, R24)
1540;; Clobbers: R23, R22
1541DEFUN __roundhq3
1542 subi RP, __HQ_FBIT__ - __HA_FBIT__
1543ENDF __roundhq3
1544DEFUN __roundha3
1545 subi RP, __HA_FBIT__ - 1
1546 neg RP
1547 ;; [ R25:R24 = 1 << (FBIT-1 - RP)
1548 ;; R23:R22 += 1 << (FBIT-1 - RP) ]
1549 XCALL __addmask_2
1550 XJMP __round_s2_const
1551ENDF __roundha3
1552
1553#endif /* L_round_s2 */
1554
1555#ifdef L_round_u2
1556
1557;; R25:R24 = round (R23:R22, R24)
1558;; Clobbers: R23, R22
1559DEFUN __rounduhq3
1560 subi RP, __UHQ_FBIT__ - __UHA_FBIT__
1561ENDF __rounduhq3
1562DEFUN __rounduha3
1563 subi RP, __UHA_FBIT__ - 1
1564 neg RP
1565 ;; [ R25:R24 = 1 << (FBIT-1 - RP)
1566 ;; R23:R22 += 1 << (FBIT-1 - RP) ]
1567 XCALL __addmask_2
1568 XJMP __round_u2_const
1569ENDF __rounduha3
1570
1571#endif /* L_round_u2 */
1572
1573
1574#ifdef L_round_2_const
1575
1576;; Helpers for 2 byte wide rounding
1577
1578DEFUN __round_s2_const
1579 brvc 2f
02371798 1580 ldi C1, 0x7f
85d768f3
GJL
1581 rjmp 1f
1582 ;; FALLTHRU (Barrier)
1583ENDF __round_s2_const
1584
1585DEFUN __round_u2_const
1586 brcc 2f
02371798 1587 ldi C1, 0xff
85d768f3 15881:
02371798
GJL
1589 ldi C0, 0xff
1590 rjmp 9f
85d768f3
GJL
15912:
1592 ;; Saturation is performed now.
1593 ;; Currently, we have C[] = 2^{-RP-1}
1594 ;; C[] = 2^{-RP}
1595 lsl C0
1596 rol C1
1597 ;;
1598 NEG2 C0
1599 ;; Clear the bits beyond the rounding point.
1600 and C0, A0
1601 and C1, A1
02371798 16029: ret
85d768f3
GJL
1603ENDF __round_u2_const
1604
1605#endif /* L_round_2_const */
1606
1607#undef A0
1608#undef A1
1609#undef C0
1610#undef C1
1611
1612;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1613;; Rounding, 4 Bytes
1614;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1615
1616#define A0 18
1617#define A1 A0 + 1
1618#define A2 A0 + 2
1619#define A3 A0 + 3
1620
1621#define C0 22
1622#define C1 C0 + 1
1623#define C2 C0 + 2
1624#define C3 C0 + 3
1625
1626#ifdef L_addmask_4
1627
1628;; [ R25:R22 = 1 << (R24 & 31)
1629;; R21:R18 += 1 << (R24 & 31) ]
1630;; SREG is set according to the addition
1631DEFUN __addmask_4
1632 ;; R25 = 1 << (R24 & 7)
1633 XCALL __mask1
1634 cpi RP, 1 << 4
1635 sbc C0, C0
1636 sbc C1, C1
1637 ;; Swap C2 with C3 if RP.3 is not set
1638 cpi RP, 1 << 3
1639 sbc C2, C2
1640 and C2, C3
1641 eor C3, C2
1642 ;; Swap C3:C2 with C1:C0 if RP.4 is not set
1643 and C0, C2 $ eor C2, C0
1644 and C1, C3 $ eor C3, C1
1645 ;; Finally, add the power-of-two: A[] += C[]
1646 add A0, C0
1647 adc A1, C1
1648 adc A2, C2
1649 adc A3, C3
1650 ret
1651ENDF __addmask_4
1652#endif /* L_addmask_4 */
1653
1654#ifdef L_round_s4
1655
1656;; R25:R22 = round (R21:R18, R24)
1657;; Clobbers: R18...R21
1658DEFUN __roundsq3
1659 subi RP, __SQ_FBIT__ - __SA_FBIT__
1660ENDF __roundsq3
1661DEFUN __roundsa3
1662 subi RP, __SA_FBIT__ - 1
1663 neg RP
1664 ;; [ R25:R22 = 1 << (FBIT-1 - RP)
1665 ;; R21:R18 += 1 << (FBIT-1 - RP) ]
1666 XCALL __addmask_4
1667 XJMP __round_s4_const
1668ENDF __roundsa3
1669
1670#endif /* L_round_s4 */
1671
1672#ifdef L_round_u4
1673
1674;; R25:R22 = round (R21:R18, R24)
1675;; Clobbers: R18...R21
1676DEFUN __roundusq3
1677 subi RP, __USQ_FBIT__ - __USA_FBIT__
1678ENDF __roundusq3
1679DEFUN __roundusa3
1680 subi RP, __USA_FBIT__ - 1
1681 neg RP
1682 ;; [ R25:R22 = 1 << (FBIT-1 - RP)
1683 ;; R21:R18 += 1 << (FBIT-1 - RP) ]
1684 XCALL __addmask_4
1685 XJMP __round_u4_const
1686ENDF __roundusa3
1687
1688#endif /* L_round_u4 */
1689
1690
1691#ifdef L_round_4_const
1692
1693;; Helpers for 4 byte wide rounding
1694
1695DEFUN __round_s4_const
1696 brvc 2f
02371798 1697 ldi C3, 0x7f
85d768f3
GJL
1698 rjmp 1f
1699 ;; FALLTHRU (Barrier)
1700ENDF __round_s4_const
1701
1702DEFUN __round_u4_const
1703 brcc 2f
02371798 1704 ldi C3, 0xff
85d768f3 17051:
02371798
GJL
1706 ldi C2, 0xff
1707 ldi C1, 0xff
1708 ldi C0, 0xff
1709 rjmp 9f
85d768f3
GJL
17102:
1711 ;; Saturation is performed now.
1712 ;; Currently, we have C[] = 2^{-RP-1}
1713 ;; C[] = 2^{-RP}
1714 lsl C0
1715 rol C1
1716 rol C2
1717 rol C3
1718 XCALL __negsi2
1719 ;; Clear the bits beyond the rounding point.
1720 and C0, A0
1721 and C1, A1
1722 and C2, A2
1723 and C3, A3
02371798 17249: ret
85d768f3
GJL
1725ENDF __round_u4_const
1726
1727#endif /* L_round_4_const */
1728
1729#undef A0
1730#undef A1
1731#undef A2
1732#undef A3
1733#undef C0
1734#undef C1
1735#undef C2
1736#undef C3
1737
1738#undef RP
1739
1740;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1741;; Rounding, 8 Bytes
1742;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1743
1744#define RP 16
1745#define FBITm1 31
1746
1747#define C0 18
1748#define C1 C0 + 1
1749#define C2 C0 + 2
1750#define C3 C0 + 3
1751#define C4 C0 + 4
1752#define C5 C0 + 5
1753#define C6 C0 + 6
1754#define C7 C0 + 7
1755
1756#define A0 16
1757#define A1 17
1758#define A2 26
1759#define A3 27
1760#define A4 28
1761#define A5 29
1762#define A6 30
1763#define A7 31
1764
1765
1766#ifdef L_rounddq3
1767;; R25:R18 = round (R25:R18, R16)
1768;; Clobbers: ABI
1769DEFUN __rounddq3
1770 ldi FBITm1, __DQ_FBIT__ - 1
1771 clt
1772 XJMP __round_x8
1773ENDF __rounddq3
1774#endif /* L_rounddq3 */
1775
1776#ifdef L_roundudq3
1777;; R25:R18 = round (R25:R18, R16)
1778;; Clobbers: ABI
1779DEFUN __roundudq3
1780 ldi FBITm1, __UDQ_FBIT__ - 1
1781 set
1782 XJMP __round_x8
1783ENDF __roundudq3
1784#endif /* L_roundudq3 */
1785
1786#ifdef L_roundda3
1787;; R25:R18 = round (R25:R18, R16)
1788;; Clobbers: ABI
1789DEFUN __roundda3
1790 ldi FBITm1, __DA_FBIT__ - 1
1791 clt
1792 XJMP __round_x8
1793ENDF __roundda3
1794#endif /* L_roundda3 */
1795
1796#ifdef L_rounduda3
1797;; R25:R18 = round (R25:R18, R16)
1798;; Clobbers: ABI
1799DEFUN __rounduda3
1800 ldi FBITm1, __UDA_FBIT__ - 1
1801 set
1802 XJMP __round_x8
1803ENDF __rounduda3
1804#endif /* L_rounduda3 */
1805
1806#ifdef L_roundta3
1807;; R25:R18 = round (R25:R18, R16)
1808;; Clobbers: ABI
1809DEFUN __roundta3
1810 ldi FBITm1, __TA_FBIT__ - 1
1811 clt
1812 XJMP __round_x8
1813ENDF __roundta3
1814#endif /* L_roundta3 */
1815
1816#ifdef L_rounduta3
1817;; R25:R18 = round (R25:R18, R16)
1818;; Clobbers: ABI
1819DEFUN __rounduta3
1820 ldi FBITm1, __UTA_FBIT__ - 1
1821 set
1822 XJMP __round_x8
1823ENDF __rounduta3
1824#endif /* L_rounduta3 */
1825
1826
1827#ifdef L_round_x8
1828DEFUN __round_x8
1829 push r16
1830 push r17
1831 push r28
1832 push r29
1833 ;; Compute log2 of addend from rounding point
1834 sub RP, FBITm1
1835 neg RP
1836 ;; Move input to work register A[]
1837 push C0
1838 mov A1, C1
1839 wmov A2, C2
1840 wmov A4, C4
1841 wmov A6, C6
1842 ;; C[] = 1 << (FBIT-1 - RP)
1843 XCALL __clr_8
1844 inc C0
1845 XCALL __ashldi3
1846 pop A0
1847 ;; A[] += C[]
1848 add A0, C0
1849 adc A1, C1
1850 adc A2, C2
1851 adc A3, C3
1852 adc A4, C4
1853 adc A5, C5
1854 adc A6, C6
1855 adc A7, C7
1856 brts 1f
1857 ;; Signed
1858 brvc 3f
1859 ;; Signed overflow: A[] = 0x7f...
1860 brvs 2f
18611: ;; Unsigned
1862 brcc 3f
1863 ;; Unsigned overflow: A[] = 0xff...
02371798
GJL
18642: ldi C7, 0xff
1865 ldi C6, 0xff
1866 wmov C0, C6
1867 wmov C2, C6
1868 wmov C4, C6
1869 bld C7, 7
1870 rjmp 9f
85d768f3
GJL
18713:
1872 ;; C[] = -C[] - C[]
1873 push A0
1874 ldi r16, 1
1875 XCALL __ashldi3
1876 pop A0
1877 XCALL __negdi2
1878 ;; Clear the bits beyond the rounding point.
1879 and C0, A0
1880 and C1, A1
1881 and C2, A2
1882 and C3, A3
1883 and C4, A4
1884 and C5, A5
1885 and C6, A6
1886 and C7, A7
02371798 18879: ;; Epilogue
85d768f3
GJL
1888 pop r29
1889 pop r28
1890 pop r17
1891 pop r16
1892 ret
1893ENDF __round_x8
1894
1895#endif /* L_round_x8 */
1896
1897#undef A0
1898#undef A1
1899#undef A2
1900#undef A3
1901#undef A4
1902#undef A5
1903#undef A6
1904#undef A7
1905
1906#undef C0
1907#undef C1
1908#undef C2
1909#undef C3
1910#undef C4
1911#undef C5
1912#undef C6
1913#undef C7
1914
1915#undef RP
1916#undef FBITm1
1917
1918
1919;; Supply implementations / symbols for the bit-banging functions
1920;; __builtin_avr_bitsfx and __builtin_avr_fxbits
1921#ifdef L_ret
1922DEFUN __ret
1923 ret
1924ENDF __ret
1925#endif /* L_ret */
c1dd9790
JR
1926
1927#endif /* if not __AVR_TINY__ */