]> git.ipfire.org Git - thirdparty/gcc.git/blob - libgcc/config/avr/lib1funcs-fixed.S
Update copyright years.
[thirdparty/gcc.git] / libgcc / config / avr / lib1funcs-fixed.S
1 /* -*- Mode: Asm -*- */
2 ;; Copyright (C) 2012-2016 Free Software Foundation, Inc.
3 ;; Contributed by Sean D'Epagnier (sean@depagnier.com)
4 ;; Georg-Johann Lay (avr@gjlay.de)
5
6 ;; This file is free software; you can redistribute it and/or modify it
7 ;; under the terms of the GNU General Public License as published by the
8 ;; Free Software Foundation; either version 3, or (at your option) any
9 ;; later version.
10
11 ;; In addition to the permissions in the GNU General Public License, the
12 ;; Free Software Foundation gives you unlimited permission to link the
13 ;; compiled version of this file into combinations with other programs,
14 ;; and to distribute those combinations without any restriction coming
15 ;; from the use of this file. (The General Public License restrictions
16 ;; do apply in other respects; for example, they cover modification of
17 ;; the file, and distribution when not linked into a combine
18 ;; executable.)
19
20 ;; This file is distributed in the hope that it will be useful, but
21 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
22 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 ;; General Public License for more details.
24
25 ;; You should have received a copy of the GNU General Public License
26 ;; along with this program; see the file COPYING. If not, write to
27 ;; the Free Software Foundation, 51 Franklin Street, Fifth Floor,
28 ;; Boston, MA 02110-1301, USA.
29
30 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
31 ;; Fixed point library routines for AVR
32 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
33
34 #if defined __AVR_TINY__
35 #define __zero_reg__ r17
36 #define __tmp_reg__ r16
37 #else
38 #define __zero_reg__ r1
39 #define __tmp_reg__ r0
40 #endif
41
42 .section .text.libgcc.fixed, "ax", @progbits
43
44 #ifndef __AVR_TINY__
45
46 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
47 ;; Conversions to float
48 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
49
50 #if defined (L_fractqqsf)
51 DEFUN __fractqqsf
52 ;; Move in place for SA -> SF conversion
53 clr r22
54 mov r23, r24
55 ;; Sign-extend
56 lsl r24
57 sbc r24, r24
58 mov r25, r24
59 XJMP __fractsasf
60 ENDF __fractqqsf
61 #endif /* L_fractqqsf */
62
63 #if defined (L_fractuqqsf)
64 DEFUN __fractuqqsf
65 ;; Move in place for USA -> SF conversion
66 clr r22
67 mov r23, r24
68 ;; Zero-extend
69 clr r24
70 clr r25
71 XJMP __fractusasf
72 ENDF __fractuqqsf
73 #endif /* L_fractuqqsf */
74
75 #if defined (L_fracthqsf)
76 DEFUN __fracthqsf
77 ;; Move in place for SA -> SF conversion
78 wmov 22, 24
79 ;; Sign-extend
80 lsl r25
81 sbc r24, r24
82 mov r25, r24
83 XJMP __fractsasf
84 ENDF __fracthqsf
85 #endif /* L_fracthqsf */
86
87 #if defined (L_fractuhqsf)
88 DEFUN __fractuhqsf
89 ;; Move in place for USA -> SF conversion
90 wmov 22, 24
91 ;; Zero-extend
92 clr r24
93 clr r25
94 XJMP __fractusasf
95 ENDF __fractuhqsf
96 #endif /* L_fractuhqsf */
97
98 #if defined (L_fracthasf)
99 DEFUN __fracthasf
100 ;; Move in place for SA -> SF conversion
101 clr r22
102 mov r23, r24
103 mov r24, r25
104 ;; Sign-extend
105 lsl r25
106 sbc r25, r25
107 XJMP __fractsasf
108 ENDF __fracthasf
109 #endif /* L_fracthasf */
110
111 #if defined (L_fractuhasf)
112 DEFUN __fractuhasf
113 ;; Move in place for USA -> SF conversion
114 clr r22
115 mov r23, r24
116 mov r24, r25
117 ;; Zero-extend
118 clr r25
119 XJMP __fractusasf
120 ENDF __fractuhasf
121 #endif /* L_fractuhasf */
122
123
124 #if defined (L_fractsqsf)
125 DEFUN __fractsqsf
126 XCALL __floatsisf
127 ;; Divide non-zero results by 2^31 to move the
128 ;; decimal point into place
129 tst r25
130 breq 0f
131 subi r24, exp_lo (31)
132 sbci r25, exp_hi (31)
133 0: ret
134 ENDF __fractsqsf
135 #endif /* L_fractsqsf */
136
137 #if defined (L_fractusqsf)
138 DEFUN __fractusqsf
139 XCALL __floatunsisf
140 ;; Divide non-zero results by 2^32 to move the
141 ;; decimal point into place
142 cpse r25, __zero_reg__
143 subi r25, exp_hi (32)
144 ret
145 ENDF __fractusqsf
146 #endif /* L_fractusqsf */
147
148 #if defined (L_fractsasf)
149 DEFUN __fractsasf
150 XCALL __floatsisf
151 ;; Divide non-zero results by 2^15 to move the
152 ;; decimal point into place
153 tst r25
154 breq 0f
155 subi r24, exp_lo (15)
156 sbci r25, exp_hi (15)
157 0: ret
158 ENDF __fractsasf
159 #endif /* L_fractsasf */
160
161 #if defined (L_fractusasf)
162 DEFUN __fractusasf
163 XCALL __floatunsisf
164 ;; Divide non-zero results by 2^16 to move the
165 ;; decimal point into place
166 cpse r25, __zero_reg__
167 subi r25, exp_hi (16)
168 ret
169 ENDF __fractusasf
170 #endif /* L_fractusasf */
171
172 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
173 ;; Conversions from float
174 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
175
176 #if defined (L_fractsfqq)
177 DEFUN __fractsfqq
178 ;; Multiply with 2^{24+7} to get a QQ result in r25
179 subi r24, exp_lo (-31)
180 sbci r25, exp_hi (-31)
181 XCALL __fixsfsi
182 mov r24, r25
183 ret
184 ENDF __fractsfqq
185 #endif /* L_fractsfqq */
186
187 #if defined (L_fractsfuqq)
188 DEFUN __fractsfuqq
189 ;; Multiply with 2^{24+8} to get a UQQ result in r25
190 subi r25, exp_hi (-32)
191 XCALL __fixunssfsi
192 mov r24, r25
193 ret
194 ENDF __fractsfuqq
195 #endif /* L_fractsfuqq */
196
197 #if defined (L_fractsfha)
198 DEFUN __fractsfha
199 ;; Multiply with 2^{16+7} to get a HA result in r25:r24
200 subi r24, exp_lo (-23)
201 sbci r25, exp_hi (-23)
202 XJMP __fixsfsi
203 ENDF __fractsfha
204 #endif /* L_fractsfha */
205
206 #if defined (L_fractsfuha)
207 DEFUN __fractsfuha
208 ;; Multiply with 2^24 to get a UHA result in r25:r24
209 subi r25, exp_hi (-24)
210 XJMP __fixunssfsi
211 ENDF __fractsfuha
212 #endif /* L_fractsfuha */
213
214 #if defined (L_fractsfhq)
215 FALIAS __fractsfsq
216
217 DEFUN __fractsfhq
218 ;; Multiply with 2^{16+15} to get a HQ result in r25:r24
219 ;; resp. with 2^31 to get a SQ result in r25:r22
220 subi r24, exp_lo (-31)
221 sbci r25, exp_hi (-31)
222 XJMP __fixsfsi
223 ENDF __fractsfhq
224 #endif /* L_fractsfhq */
225
226 #if defined (L_fractsfuhq)
227 FALIAS __fractsfusq
228
229 DEFUN __fractsfuhq
230 ;; Multiply with 2^{16+16} to get a UHQ result in r25:r24
231 ;; resp. with 2^32 to get a USQ result in r25:r22
232 subi r25, exp_hi (-32)
233 XJMP __fixunssfsi
234 ENDF __fractsfuhq
235 #endif /* L_fractsfuhq */
236
237 #if defined (L_fractsfsa)
238 DEFUN __fractsfsa
239 ;; Multiply with 2^15 to get a SA result in r25:r22
240 subi r24, exp_lo (-15)
241 sbci r25, exp_hi (-15)
242 XJMP __fixsfsi
243 ENDF __fractsfsa
244 #endif /* L_fractsfsa */
245
246 #if defined (L_fractsfusa)
247 DEFUN __fractsfusa
248 ;; Multiply with 2^16 to get a USA result in r25:r22
249 subi r25, exp_hi (-16)
250 XJMP __fixunssfsi
251 ENDF __fractsfusa
252 #endif /* L_fractsfusa */
253
254
255 ;; For multiplication the functions here are called directly from
256 ;; avr-fixed.md instead of using the standard libcall mechanisms.
257 ;; This can make better code because GCC knows exactly which
258 ;; of the call-used registers (not all of them) are clobbered. */
259
260 /*******************************************************
261 Fractional Multiplication 8 x 8 without MUL
262 *******************************************************/
263
264 #if defined (L_mulqq3) && !defined (__AVR_HAVE_MUL__)
265 ;;; R23 = R24 * R25
266 ;;; Clobbers: __tmp_reg__, R22, R24, R25
267 ;;; Rounding: ???
268 DEFUN __mulqq3
269 XCALL __fmuls
270 ;; TR 18037 requires that (-1) * (-1) does not overflow
271 ;; The only input that can produce -1 is (-1)^2.
272 dec r23
273 brvs 0f
274 inc r23
275 0: ret
276 ENDF __mulqq3
277 #endif /* L_mulqq3 && ! HAVE_MUL */
278
279 /*******************************************************
280 Fractional Multiply .16 x .16 with and without MUL
281 *******************************************************/
282
283 #if defined (L_mulhq3)
284 ;;; Same code with and without MUL, but the interfaces differ:
285 ;;; no MUL: (R25:R24) = (R22:R23) * (R24:R25)
286 ;;; Clobbers: ABI, called by optabs
287 ;;; MUL: (R25:R24) = (R19:R18) * (R27:R26)
288 ;;; Clobbers: __tmp_reg__, R22, R23
289 ;;; Rounding: -0.5 LSB <= error <= 0.5 LSB
290 DEFUN __mulhq3
291 XCALL __mulhisi3
292 ;; Shift result into place
293 lsl r23
294 rol r24
295 rol r25
296 brvs 1f
297 ;; Round
298 sbrc r23, 7
299 adiw r24, 1
300 ret
301 1: ;; Overflow. TR 18037 requires (-1)^2 not to overflow
302 ldi r24, lo8 (0x7fff)
303 ldi r25, hi8 (0x7fff)
304 ret
305 ENDF __mulhq3
306 #endif /* defined (L_mulhq3) */
307
308 #if defined (L_muluhq3)
309 ;;; Same code with and without MUL, but the interfaces differ:
310 ;;; no MUL: (R25:R24) *= (R23:R22)
311 ;;; Clobbers: ABI, called by optabs
312 ;;; MUL: (R25:R24) = (R19:R18) * (R27:R26)
313 ;;; Clobbers: __tmp_reg__, R22, R23
314 ;;; Rounding: -0.5 LSB < error <= 0.5 LSB
315 DEFUN __muluhq3
316 XCALL __umulhisi3
317 ;; Round
318 sbrc r23, 7
319 adiw r24, 1
320 ret
321 ENDF __muluhq3
322 #endif /* L_muluhq3 */
323
324
325 /*******************************************************
326 Fixed Multiply 8.8 x 8.8 with and without MUL
327 *******************************************************/
328
329 #if defined (L_mulha3)
330 ;;; Same code with and without MUL, but the interfaces differ:
331 ;;; no MUL: (R25:R24) = (R22:R23) * (R24:R25)
332 ;;; Clobbers: ABI, called by optabs
333 ;;; MUL: (R25:R24) = (R19:R18) * (R27:R26)
334 ;;; Clobbers: __tmp_reg__, R22, R23
335 ;;; Rounding: -0.5 LSB <= error <= 0.5 LSB
336 DEFUN __mulha3
337 XCALL __mulhisi3
338 lsl r22
339 rol r23
340 rol r24
341 XJMP __muluha3_round
342 ENDF __mulha3
343 #endif /* L_mulha3 */
344
345 #if defined (L_muluha3)
346 ;;; Same code with and without MUL, but the interfaces differ:
347 ;;; no MUL: (R25:R24) *= (R23:R22)
348 ;;; Clobbers: ABI, called by optabs
349 ;;; MUL: (R25:R24) = (R19:R18) * (R27:R26)
350 ;;; Clobbers: __tmp_reg__, R22, R23
351 ;;; Rounding: -0.5 LSB < error <= 0.5 LSB
352 DEFUN __muluha3
353 XCALL __umulhisi3
354 XJMP __muluha3_round
355 ENDF __muluha3
356 #endif /* L_muluha3 */
357
358 #if defined (L_muluha3_round)
359 DEFUN __muluha3_round
360 ;; Shift result into place
361 mov r25, r24
362 mov r24, r23
363 ;; Round
364 sbrc r22, 7
365 adiw r24, 1
366 ret
367 ENDF __muluha3_round
368 #endif /* L_muluha3_round */
369
370
371 /*******************************************************
372 Fixed Multiplication 16.16 x 16.16
373 *******************************************************/
374
375 ;; Bits outside the result (below LSB), used in the signed version
376 #define GUARD __tmp_reg__
377
378 #if defined (__AVR_HAVE_MUL__)
379
380 ;; Multiplier
381 #define A0 16
382 #define A1 A0+1
383 #define A2 A1+1
384 #define A3 A2+1
385
386 ;; Multiplicand
387 #define B0 20
388 #define B1 B0+1
389 #define B2 B1+1
390 #define B3 B2+1
391
392 ;; Result
393 #define C0 24
394 #define C1 C0+1
395 #define C2 C1+1
396 #define C3 C2+1
397
398 #if defined (L_mulusa3)
399 ;;; (C3:C0) = (A3:A0) * (B3:B0)
400 DEFUN __mulusa3
401 set
402 ;; Fallthru
403 ENDF __mulusa3
404
405 ;;; Round for last digit iff T = 1
406 ;;; Return guard bits in GUARD (__tmp_reg__).
407 ;;; Rounding, T = 0: -1.0 LSB < error <= 0 LSB
408 ;;; Rounding, T = 1: -0.5 LSB < error <= 0.5 LSB
409 DEFUN __mulusa3_round
410 ;; Some of the MUL instructions have LSBs outside the result.
411 ;; Don't ignore these LSBs in order to tame rounding error.
412 ;; Use C2/C3 for these LSBs.
413
414 clr C0
415 clr C1
416 mul A0, B0 $ movw C2, r0
417
418 mul A1, B0 $ add C3, r0 $ adc C0, r1
419 mul A0, B1 $ add C3, r0 $ adc C0, r1 $ rol C1
420
421 ;; Round if T = 1. Store guarding bits outside the result for rounding
422 ;; and left-shift by the signed version (function below).
423 brtc 0f
424 sbrc C3, 7
425 adiw C0, 1
426 0: push C3
427
428 ;; The following MULs don't have LSBs outside the result.
429 ;; C2/C3 is the high part.
430
431 mul A0, B2 $ add C0, r0 $ adc C1, r1 $ sbc C2, C2
432 mul A1, B1 $ add C0, r0 $ adc C1, r1 $ sbci C2, 0
433 mul A2, B0 $ add C0, r0 $ adc C1, r1 $ sbci C2, 0
434 neg C2
435
436 mul A0, B3 $ add C1, r0 $ adc C2, r1 $ sbc C3, C3
437 mul A1, B2 $ add C1, r0 $ adc C2, r1 $ sbci C3, 0
438 mul A2, B1 $ add C1, r0 $ adc C2, r1 $ sbci C3, 0
439 mul A3, B0 $ add C1, r0 $ adc C2, r1 $ sbci C3, 0
440 neg C3
441
442 mul A1, B3 $ add C2, r0 $ adc C3, r1
443 mul A2, B2 $ add C2, r0 $ adc C3, r1
444 mul A3, B1 $ add C2, r0 $ adc C3, r1
445
446 mul A2, B3 $ add C3, r0
447 mul A3, B2 $ add C3, r0
448
449 ;; Guard bits used in the signed version below.
450 pop GUARD
451 clr __zero_reg__
452 ret
453 ENDF __mulusa3_round
454 #endif /* L_mulusa3 */
455
456 #if defined (L_mulsa3)
457 ;;; (C3:C0) = (A3:A0) * (B3:B0)
458 ;;; Clobbers: __tmp_reg__, T
459 ;;; Rounding: -0.5 LSB <= error <= 0.5 LSB
460 DEFUN __mulsa3
461 clt
462 XCALL __mulusa3_round
463 ;; A posteriori sign extension of the operands
464 tst B3
465 brpl 1f
466 sub C2, A0
467 sbc C3, A1
468 1: sbrs A3, 7
469 rjmp 2f
470 sub C2, B0
471 sbc C3, B1
472 2:
473 ;; Shift 1 bit left to adjust for 15 fractional bits
474 lsl GUARD
475 rol C0
476 rol C1
477 rol C2
478 rol C3
479 ;; Round last digit
480 lsl GUARD
481 adc C0, __zero_reg__
482 adc C1, __zero_reg__
483 adc C2, __zero_reg__
484 adc C3, __zero_reg__
485 ret
486 ENDF __mulsa3
487 #endif /* L_mulsa3 */
488
489 #undef A0
490 #undef A1
491 #undef A2
492 #undef A3
493 #undef B0
494 #undef B1
495 #undef B2
496 #undef B3
497 #undef C0
498 #undef C1
499 #undef C2
500 #undef C3
501
502 #else /* __AVR_HAVE_MUL__ */
503
504 #define A0 18
505 #define A1 A0+1
506 #define A2 A0+2
507 #define A3 A0+3
508
509 #define B0 22
510 #define B1 B0+1
511 #define B2 B0+2
512 #define B3 B0+3
513
514 #define C0 22
515 #define C1 C0+1
516 #define C2 C0+2
517 #define C3 C0+3
518
519 ;; __tmp_reg__
520 #define CC0 0
521 ;; __zero_reg__
522 #define CC1 1
523 #define CC2 16
524 #define CC3 17
525
526 #define AA0 26
527 #define AA1 AA0+1
528 #define AA2 30
529 #define AA3 AA2+1
530
531 #if defined (L_mulsa3)
532 ;;; (R25:R22) *= (R21:R18)
533 ;;; Clobbers: ABI, called by optabs
534 ;;; Rounding: -1 LSB <= error <= 1 LSB
535 DEFUN __mulsa3
536 push B0
537 push B1
538 push B3
539 clt
540 XCALL __mulusa3_round
541 pop r30
542 ;; sign-extend B
543 bst r30, 7
544 brtc 1f
545 ;; A1, A0 survived in R27:R26
546 sub C2, AA0
547 sbc C3, AA1
548 1:
549 pop AA1 ;; B1
550 pop AA0 ;; B0
551
552 ;; sign-extend A. A3 survived in R31
553 bst AA3, 7
554 brtc 2f
555 sub C2, AA0
556 sbc C3, AA1
557 2:
558 ;; Shift 1 bit left to adjust for 15 fractional bits
559 lsl GUARD
560 rol C0
561 rol C1
562 rol C2
563 rol C3
564 ;; Round last digit
565 lsl GUARD
566 adc C0, __zero_reg__
567 adc C1, __zero_reg__
568 adc C2, __zero_reg__
569 adc C3, __zero_reg__
570 ret
571 ENDF __mulsa3
572 #endif /* L_mulsa3 */
573
574 #if defined (L_mulusa3)
575 ;;; (R25:R22) *= (R21:R18)
576 ;;; Clobbers: ABI, called by optabs
577 ;;; Rounding: -1 LSB <= error <= 1 LSB
578 DEFUN __mulusa3
579 set
580 ;; Fallthru
581 ENDF __mulusa3
582
583 ;;; A[] survives in 26, 27, 30, 31
584 ;;; Also used by __mulsa3 with T = 0
585 ;;; Round if T = 1
586 ;;; Return Guard bits in GUARD (__tmp_reg__), used by signed version.
587 DEFUN __mulusa3_round
588 push CC2
589 push CC3
590 ; clear result
591 clr __tmp_reg__
592 wmov CC2, CC0
593 ; save multiplicand
594 wmov AA0, A0
595 wmov AA2, A2
596 rjmp 3f
597
598 ;; Loop the integral part
599
600 1: ;; CC += A * 2^n; n >= 0
601 add CC0,A0 $ adc CC1,A1 $ adc CC2,A2 $ adc CC3,A3
602
603 2: ;; A <<= 1
604 lsl A0 $ rol A1 $ rol A2 $ rol A3
605
606 3: ;; IBIT(B) >>= 1
607 ;; Carry = n-th bit of B; n >= 0
608 lsr B3
609 ror B2
610 brcs 1b
611 sbci B3, 0
612 brne 2b
613
614 ;; Loop the fractional part
615 ;; B2/B3 is 0 now, use as guard bits for rounding
616 ;; Restore multiplicand
617 wmov A0, AA0
618 wmov A2, AA2
619 rjmp 5f
620
621 4: ;; CC += A:Guard * 2^n; n < 0
622 add B3,B2 $ adc CC0,A0 $ adc CC1,A1 $ adc CC2,A2 $ adc CC3,A3
623 5:
624 ;; A:Guard >>= 1
625 lsr A3 $ ror A2 $ ror A1 $ ror A0 $ ror B2
626
627 ;; FBIT(B) <<= 1
628 ;; Carry = n-th bit of B; n < 0
629 lsl B0
630 rol B1
631 brcs 4b
632 sbci B0, 0
633 brne 5b
634
635 ;; Save guard bits and set carry for rounding
636 push B3
637 lsl B3
638 ;; Move result into place
639 wmov C2, CC2
640 wmov C0, CC0
641 clr __zero_reg__
642 brtc 6f
643 ;; Round iff T = 1
644 adc C0, __zero_reg__
645 adc C1, __zero_reg__
646 adc C2, __zero_reg__
647 adc C3, __zero_reg__
648 6:
649 pop GUARD
650 ;; Epilogue
651 pop CC3
652 pop CC2
653 ret
654 ENDF __mulusa3_round
655 #endif /* L_mulusa3 */
656
657 #undef A0
658 #undef A1
659 #undef A2
660 #undef A3
661 #undef B0
662 #undef B1
663 #undef B2
664 #undef B3
665 #undef C0
666 #undef C1
667 #undef C2
668 #undef C3
669 #undef AA0
670 #undef AA1
671 #undef AA2
672 #undef AA3
673 #undef CC0
674 #undef CC1
675 #undef CC2
676 #undef CC3
677
678 #endif /* __AVR_HAVE_MUL__ */
679
680 #undef GUARD
681
682 /***********************************************************
683 Fixed unsigned saturated Multiplication 8.8 x 8.8
684 ***********************************************************/
685
686 #define C0 22
687 #define C1 C0+1
688 #define C2 C0+2
689 #define C3 C0+3
690 #define SS __tmp_reg__
691
692 #if defined (L_usmuluha3)
693 DEFUN __usmuluha3
694 ;; Widening multiply
695 #ifdef __AVR_HAVE_MUL__
696 ;; Adjust interface
697 movw R26, R22
698 movw R18, R24
699 #endif /* HAVE MUL */
700 XCALL __umulhisi3
701 tst C3
702 brne .Lmax
703 ;; Round, target is in C1..C2
704 lsl C0
705 adc C1, __zero_reg__
706 adc C2, __zero_reg__
707 brcs .Lmax
708 ;; Move result into place
709 mov C3, C2
710 mov C2, C1
711 ret
712 .Lmax:
713 ;; Saturate
714 ldi C2, 0xff
715 ldi C3, 0xff
716 ret
717 ENDF __usmuluha3
718 #endif /* L_usmuluha3 */
719
720 /***********************************************************
721 Fixed signed saturated Multiplication s8.7 x s8.7
722 ***********************************************************/
723
724 #if defined (L_ssmulha3)
725 DEFUN __ssmulha3
726 ;; Widening multiply
727 #ifdef __AVR_HAVE_MUL__
728 ;; Adjust interface
729 movw R26, R22
730 movw R18, R24
731 #endif /* HAVE MUL */
732 XCALL __mulhisi3
733 ;; Adjust decimal point
734 lsl C0
735 rol C1
736 rol C2
737 brvs .LsatC3.3
738 ;; The 9 MSBs must be the same
739 rol C3
740 sbc SS, SS
741 cp C3, SS
742 brne .LsatSS
743 ;; Round
744 lsl C0
745 adc C1, __zero_reg__
746 adc C2, __zero_reg__
747 brvs .Lmax
748 ;; Move result into place
749 mov C3, C2
750 mov C2, C1
751 ret
752 .Lmax:
753 ;; Load 0x7fff
754 clr C3
755 .LsatC3.3:
756 ;; C3 < 0 --> 0x8000
757 ;; C3 >= 0 --> 0x7fff
758 mov SS, C3
759 .LsatSS:
760 ;; Load min / max value:
761 ;; SS = -1 --> 0x8000
762 ;; SS = 0 --> 0x7fff
763 ldi C3, 0x7f
764 ldi C2, 0xff
765 sbrc SS, 7
766 adiw C2, 1
767 ret
768 ENDF __ssmulha3
769 #endif /* L_ssmulha3 */
770
771 #undef C0
772 #undef C1
773 #undef C2
774 #undef C3
775 #undef SS
776
777 /***********************************************************
778 Fixed unsigned saturated Multiplication 16.16 x 16.16
779 ***********************************************************/
780
781 #define C0 18
782 #define C1 C0+1
783 #define C2 C0+2
784 #define C3 C0+3
785 #define C4 C0+4
786 #define C5 C0+5
787 #define C6 C0+6
788 #define C7 C0+7
789 #define SS __tmp_reg__
790
791 #if defined (L_usmulusa3)
792 ;; R22[4] = R22[4] *{ssat} R18[4]
793 ;; Ordinary ABI function
794 DEFUN __usmulusa3
795 ;; Widening multiply
796 XCALL __umulsidi3
797 or C7, C6
798 brne .Lmax
799 ;; Round, target is in C2..C5
800 lsl C1
801 adc C2, __zero_reg__
802 adc C3, __zero_reg__
803 adc C4, __zero_reg__
804 adc C5, __zero_reg__
805 brcs .Lmax
806 ;; Move result into place
807 wmov C6, C4
808 wmov C4, C2
809 ret
810 .Lmax:
811 ;; Saturate
812 ldi C7, 0xff
813 ldi C6, 0xff
814 wmov C4, C6
815 ret
816 ENDF __usmulusa3
817 #endif /* L_usmulusa3 */
818
819 /***********************************************************
820 Fixed signed saturated Multiplication s16.15 x s16.15
821 ***********************************************************/
822
823 #if defined (L_ssmulsa3)
824 ;; R22[4] = R22[4] *{ssat} R18[4]
825 ;; Ordinary ABI function
826 DEFUN __ssmulsa3
827 ;; Widening multiply
828 XCALL __mulsidi3
829 ;; Adjust decimal point
830 lsl C1
831 rol C2
832 rol C3
833 rol C4
834 rol C5
835 brvs .LsatC7.7
836 ;; The 17 MSBs must be the same
837 rol C6
838 rol C7
839 sbc SS, SS
840 cp C6, SS
841 cpc C7, SS
842 brne .LsatSS
843 ;; Round
844 lsl C1
845 adc C2, __zero_reg__
846 adc C3, __zero_reg__
847 adc C4, __zero_reg__
848 adc C5, __zero_reg__
849 brvs .Lmax
850 ;; Move result into place
851 wmov C6, C4
852 wmov C4, C2
853 ret
854
855 .Lmax:
856 ;; Load 0x7fffffff
857 clr C7
858 .LsatC7.7:
859 ;; C7 < 0 --> 0x80000000
860 ;; C7 >= 0 --> 0x7fffffff
861 lsl C7
862 sbc SS, SS
863 .LsatSS:
864 ;; Load min / max value:
865 ;; SS = -1 --> 0x80000000
866 ;; SS = 0 --> 0x7fffffff
867 com SS
868 mov C4, SS
869 mov C5, C4
870 wmov C6, C4
871 subi C7, 0x80
872 ret
873 ENDF __ssmulsa3
874 #endif /* L_ssmulsa3 */
875
876 #undef C0
877 #undef C1
878 #undef C2
879 #undef C3
880 #undef C4
881 #undef C5
882 #undef C6
883 #undef C7
884 #undef SS
885
886 /*******************************************************
887 Fractional Division 8 / 8
888 *******************************************************/
889
890 #define r_divd r25 /* dividend */
891 #define r_quo r24 /* quotient */
892 #define r_div r22 /* divisor */
893 #define r_sign __tmp_reg__
894
895 #if defined (L_divqq3)
896 DEFUN __divqq3
897 mov r_sign, r_divd
898 eor r_sign, r_div
899 sbrc r_div, 7
900 neg r_div
901 sbrc r_divd, 7
902 neg r_divd
903 XCALL __divqq_helper
904 lsr r_quo
905 sbrc r_sign, 7 ; negate result if needed
906 neg r_quo
907 ret
908 ENDF __divqq3
909 #endif /* L_divqq3 */
910
911 #if defined (L_udivuqq3)
912 DEFUN __udivuqq3
913 cp r_divd, r_div
914 brsh 0f
915 XJMP __divqq_helper
916 ;; Result is out of [0, 1) ==> Return 1 - eps.
917 0: ldi r_quo, 0xff
918 ret
919 ENDF __udivuqq3
920 #endif /* L_udivuqq3 */
921
922
923 #if defined (L_divqq_helper)
924 DEFUN __divqq_helper
925 clr r_quo ; clear quotient
926 inc __zero_reg__ ; init loop counter, used per shift
927 __udivuqq3_loop:
928 lsl r_divd ; shift dividend
929 brcs 0f ; dividend overflow
930 cp r_divd,r_div ; compare dividend & divisor
931 brcc 0f ; dividend >= divisor
932 rol r_quo ; shift quotient (with CARRY)
933 rjmp __udivuqq3_cont
934 0:
935 sub r_divd,r_div ; restore dividend
936 lsl r_quo ; shift quotient (without CARRY)
937 __udivuqq3_cont:
938 lsl __zero_reg__ ; shift loop-counter bit
939 brne __udivuqq3_loop
940 com r_quo ; complement result
941 ; because C flag was complemented in loop
942 ret
943 ENDF __divqq_helper
944 #endif /* L_divqq_helper */
945
946 #undef r_divd
947 #undef r_quo
948 #undef r_div
949 #undef r_sign
950
951
952 /*******************************************************
953 Fractional Division 16 / 16
954 *******************************************************/
955 #define r_divdL 26 /* dividend Low */
956 #define r_divdH 27 /* dividend Hig */
957 #define r_quoL 24 /* quotient Low */
958 #define r_quoH 25 /* quotient High */
959 #define r_divL 22 /* divisor */
960 #define r_divH 23 /* divisor */
961 #define r_cnt 21
962
963 #if defined (L_divhq3)
964 DEFUN __divhq3
965 mov r0, r_divdH
966 eor r0, r_divH
967 sbrs r_divH, 7
968 rjmp 1f
969 NEG2 r_divL
970 1:
971 sbrs r_divdH, 7
972 rjmp 2f
973 NEG2 r_divdL
974 2:
975 cp r_divdL, r_divL
976 cpc r_divdH, r_divH
977 breq __divhq3_minus1 ; if equal return -1
978 XCALL __udivuhq3
979 lsr r_quoH
980 ror r_quoL
981 brpl 9f
982 ;; negate result if needed
983 NEG2 r_quoL
984 9:
985 ret
986 __divhq3_minus1:
987 ldi r_quoH, 0x80
988 clr r_quoL
989 ret
990 ENDF __divhq3
991 #endif /* defined (L_divhq3) */
992
993 #if defined (L_udivuhq3)
994 DEFUN __udivuhq3
995 sub r_quoH,r_quoH ; clear quotient and carry
996 ;; FALLTHRU
997 ENDF __udivuhq3
998
999 DEFUN __udivuha3_common
1000 clr r_quoL ; clear quotient
1001 ldi r_cnt,16 ; init loop counter
1002 __udivuhq3_loop:
1003 rol r_divdL ; shift dividend (with CARRY)
1004 rol r_divdH
1005 brcs __udivuhq3_ep ; dividend overflow
1006 cp r_divdL,r_divL ; compare dividend & divisor
1007 cpc r_divdH,r_divH
1008 brcc __udivuhq3_ep ; dividend >= divisor
1009 rol r_quoL ; shift quotient (with CARRY)
1010 rjmp __udivuhq3_cont
1011 __udivuhq3_ep:
1012 sub r_divdL,r_divL ; restore dividend
1013 sbc r_divdH,r_divH
1014 lsl r_quoL ; shift quotient (without CARRY)
1015 __udivuhq3_cont:
1016 rol r_quoH ; shift quotient
1017 dec r_cnt ; decrement loop counter
1018 brne __udivuhq3_loop
1019 com r_quoL ; complement result
1020 com r_quoH ; because C flag was complemented in loop
1021 ret
1022 ENDF __udivuha3_common
1023 #endif /* defined (L_udivuhq3) */
1024
1025 /*******************************************************
1026 Fixed Division 8.8 / 8.8
1027 *******************************************************/
1028 #if defined (L_divha3)
1029 DEFUN __divha3
1030 mov r0, r_divdH
1031 eor r0, r_divH
1032 sbrs r_divH, 7
1033 rjmp 1f
1034 NEG2 r_divL
1035 1:
1036 sbrs r_divdH, 7
1037 rjmp 2f
1038 NEG2 r_divdL
1039 2:
1040 XCALL __udivuha3
1041 lsr r_quoH ; adjust to 7 fractional bits
1042 ror r_quoL
1043 sbrs r0, 7 ; negate result if needed
1044 ret
1045 NEG2 r_quoL
1046 ret
1047 ENDF __divha3
1048 #endif /* defined (L_divha3) */
1049
1050 #if defined (L_udivuha3)
1051 DEFUN __udivuha3
1052 mov r_quoH, r_divdL
1053 mov r_divdL, r_divdH
1054 clr r_divdH
1055 lsl r_quoH ; shift quotient into carry
1056 XJMP __udivuha3_common ; same as fractional after rearrange
1057 ENDF __udivuha3
1058 #endif /* defined (L_udivuha3) */
1059
1060 #undef r_divdL
1061 #undef r_divdH
1062 #undef r_quoL
1063 #undef r_quoH
1064 #undef r_divL
1065 #undef r_divH
1066 #undef r_cnt
1067
1068 /*******************************************************
1069 Fixed Division 16.16 / 16.16
1070 *******************************************************/
1071
1072 #define r_arg1L 24 /* arg1 gets passed already in place */
1073 #define r_arg1H 25
1074 #define r_arg1HL 26
1075 #define r_arg1HH 27
1076 #define r_divdL 26 /* dividend Low */
1077 #define r_divdH 27
1078 #define r_divdHL 30
1079 #define r_divdHH 31 /* dividend High */
1080 #define r_quoL 22 /* quotient Low */
1081 #define r_quoH 23
1082 #define r_quoHL 24
1083 #define r_quoHH 25 /* quotient High */
1084 #define r_divL 18 /* divisor Low */
1085 #define r_divH 19
1086 #define r_divHL 20
1087 #define r_divHH 21 /* divisor High */
1088 #define r_cnt __zero_reg__ /* loop count (0 after the loop!) */
1089
1090 #if defined (L_divsa3)
1091 DEFUN __divsa3
1092 mov r0, r_arg1HH
1093 eor r0, r_divHH
1094 sbrs r_divHH, 7
1095 rjmp 1f
1096 NEG4 r_divL
1097 1:
1098 sbrs r_arg1HH, 7
1099 rjmp 2f
1100 NEG4 r_arg1L
1101 2:
1102 XCALL __udivusa3
1103 lsr r_quoHH ; adjust to 15 fractional bits
1104 ror r_quoHL
1105 ror r_quoH
1106 ror r_quoL
1107 sbrs r0, 7 ; negate result if needed
1108 ret
1109 ;; negate r_quoL
1110 XJMP __negsi2
1111 ENDF __divsa3
1112 #endif /* defined (L_divsa3) */
1113
1114 #if defined (L_udivusa3)
1115 DEFUN __udivusa3
1116 ldi r_divdHL, 32 ; init loop counter
1117 mov r_cnt, r_divdHL
1118 clr r_divdHL
1119 clr r_divdHH
1120 wmov r_quoL, r_divdHL
1121 lsl r_quoHL ; shift quotient into carry
1122 rol r_quoHH
1123 __udivusa3_loop:
1124 rol r_divdL ; shift dividend (with CARRY)
1125 rol r_divdH
1126 rol r_divdHL
1127 rol r_divdHH
1128 brcs __udivusa3_ep ; dividend overflow
1129 cp r_divdL,r_divL ; compare dividend & divisor
1130 cpc r_divdH,r_divH
1131 cpc r_divdHL,r_divHL
1132 cpc r_divdHH,r_divHH
1133 brcc __udivusa3_ep ; dividend >= divisor
1134 rol r_quoL ; shift quotient (with CARRY)
1135 rjmp __udivusa3_cont
1136 __udivusa3_ep:
1137 sub r_divdL,r_divL ; restore dividend
1138 sbc r_divdH,r_divH
1139 sbc r_divdHL,r_divHL
1140 sbc r_divdHH,r_divHH
1141 lsl r_quoL ; shift quotient (without CARRY)
1142 __udivusa3_cont:
1143 rol r_quoH ; shift quotient
1144 rol r_quoHL
1145 rol r_quoHH
1146 dec r_cnt ; decrement loop counter
1147 brne __udivusa3_loop
1148 com r_quoL ; complement result
1149 com r_quoH ; because C flag was complemented in loop
1150 com r_quoHL
1151 com r_quoHH
1152 ret
1153 ENDF __udivusa3
1154 #endif /* defined (L_udivusa3) */
1155
1156 #undef r_arg1L
1157 #undef r_arg1H
1158 #undef r_arg1HL
1159 #undef r_arg1HH
1160 #undef r_divdL
1161 #undef r_divdH
1162 #undef r_divdHL
1163 #undef r_divdHH
1164 #undef r_quoL
1165 #undef r_quoH
1166 #undef r_quoHL
1167 #undef r_quoHH
1168 #undef r_divL
1169 #undef r_divH
1170 #undef r_divHL
1171 #undef r_divHH
1172 #undef r_cnt
1173
1174 \f
1175 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1176 ;; Saturation, 1 Byte
1177 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1178
1179 ;; First Argument and Return Register
1180 #define A0 24
1181
1182 #if defined (L_ssabs_1)
1183 DEFUN __ssabs_1
1184 sbrs A0, 7
1185 ret
1186 neg A0
1187 sbrc A0,7
1188 dec A0
1189 ret
1190 ENDF __ssabs_1
1191 #endif /* L_ssabs_1 */
1192
1193 #undef A0
1194
1195
1196 \f
1197 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1198 ;; Saturation, 2 Bytes
1199 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1200
1201 ;; First Argument and Return Register
1202 #define A0 24
1203 #define A1 A0+1
1204
1205 #if defined (L_ssneg_2)
1206 DEFUN __ssneg_2
1207 NEG2 A0
1208 brvc 0f
1209 sbiw A0, 1
1210 0: ret
1211 ENDF __ssneg_2
1212 #endif /* L_ssneg_2 */
1213
1214 #if defined (L_ssabs_2)
1215 DEFUN __ssabs_2
1216 sbrs A1, 7
1217 ret
1218 XJMP __ssneg_2
1219 ENDF __ssabs_2
1220 #endif /* L_ssabs_2 */
1221
1222 #undef A0
1223 #undef A1
1224
1225
1226 \f
1227 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1228 ;; Saturation, 4 Bytes
1229 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1230
1231 ;; First Argument and Return Register
1232 #define A0 22
1233 #define A1 A0+1
1234 #define A2 A0+2
1235 #define A3 A0+3
1236
1237 #if defined (L_ssneg_4)
1238 DEFUN __ssneg_4
1239 XCALL __negsi2
1240 brvc 0f
1241 ldi A3, 0x7f
1242 ldi A2, 0xff
1243 ldi A1, 0xff
1244 ldi A0, 0xff
1245 0: ret
1246 ENDF __ssneg_4
1247 #endif /* L_ssneg_4 */
1248
1249 #if defined (L_ssabs_4)
1250 DEFUN __ssabs_4
1251 sbrs A3, 7
1252 ret
1253 XJMP __ssneg_4
1254 ENDF __ssabs_4
1255 #endif /* L_ssabs_4 */
1256
1257 #undef A0
1258 #undef A1
1259 #undef A2
1260 #undef A3
1261
1262
1263 \f
1264 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1265 ;; Saturation, 8 Bytes
1266 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1267
1268 ;; First Argument and Return Register
1269 #define A0 18
1270 #define A1 A0+1
1271 #define A2 A0+2
1272 #define A3 A0+3
1273 #define A4 A0+4
1274 #define A5 A0+5
1275 #define A6 A0+6
1276 #define A7 A0+7
1277
1278 #if defined (L_clr_8)
1279 FALIAS __usneguta2
1280 FALIAS __usneguda2
1281 FALIAS __usnegudq2
1282
1283 ;; Clear Carry and all Bytes
1284 DEFUN __clr_8
1285 ;; Clear Carry and set Z
1286 sub A7, A7
1287 ;; FALLTHRU
1288 ENDF __clr_8
1289 ;; Propagate Carry to all Bytes, Carry unaltered
1290 DEFUN __sbc_8
1291 sbc A7, A7
1292 sbc A6, A6
1293 wmov A4, A6
1294 wmov A2, A6
1295 wmov A0, A6
1296 ret
1297 ENDF __sbc_8
1298 #endif /* L_clr_8 */
1299
1300 #if defined (L_ssneg_8)
1301 FALIAS __ssnegta2
1302 FALIAS __ssnegda2
1303 FALIAS __ssnegdq2
1304
1305 DEFUN __ssneg_8
1306 XCALL __negdi2
1307 brvc 0f
1308 ;; A[] = 0x7fffffff
1309 sec
1310 XCALL __sbc_8
1311 ldi A7, 0x7f
1312 0: ret
1313 ENDF __ssneg_8
1314 #endif /* L_ssneg_8 */
1315
1316 #if defined (L_ssabs_8)
1317 FALIAS __ssabsta2
1318 FALIAS __ssabsda2
1319 FALIAS __ssabsdq2
1320
1321 DEFUN __ssabs_8
1322 sbrs A7, 7
1323 ret
1324 XJMP __ssneg_8
1325 ENDF __ssabs_8
1326 #endif /* L_ssabs_8 */
1327
1328 ;; Second Argument
1329 #define B0 10
1330 #define B1 B0+1
1331 #define B2 B0+2
1332 #define B3 B0+3
1333 #define B4 B0+4
1334 #define B5 B0+5
1335 #define B6 B0+6
1336 #define B7 B0+7
1337
1338 #if defined (L_usadd_8)
1339 FALIAS __usadduta3
1340 FALIAS __usadduda3
1341 FALIAS __usaddudq3
1342
1343 DEFUN __usadd_8
1344 XCALL __adddi3
1345 brcs 0f
1346 ret
1347 0: ;; A[] = 0xffffffff
1348 XJMP __sbc_8
1349 ENDF __usadd_8
1350 #endif /* L_usadd_8 */
1351
1352 #if defined (L_ussub_8)
1353 FALIAS __ussubuta3
1354 FALIAS __ussubuda3
1355 FALIAS __ussubudq3
1356
1357 DEFUN __ussub_8
1358 XCALL __subdi3
1359 brcs 0f
1360 ret
1361 0: ;; A[] = 0
1362 XJMP __clr_8
1363 ENDF __ussub_8
1364 #endif /* L_ussub_8 */
1365
1366 #if defined (L_ssadd_8)
1367 FALIAS __ssaddta3
1368 FALIAS __ssaddda3
1369 FALIAS __ssadddq3
1370
1371 DEFUN __ssadd_8
1372 XCALL __adddi3
1373 brvc 0f
1374 ;; A = (B >= 0) ? INT64_MAX : INT64_MIN
1375 cpi B7, 0x80
1376 XCALL __sbc_8
1377 subi A7, 0x80
1378 0: ret
1379 ENDF __ssadd_8
1380 #endif /* L_ssadd_8 */
1381
1382 #if defined (L_sssub_8)
1383 FALIAS __sssubta3
1384 FALIAS __sssubda3
1385 FALIAS __sssubdq3
1386
1387 DEFUN __sssub_8
1388 XCALL __subdi3
1389 brvc 0f
1390 ;; A = (B < 0) ? INT64_MAX : INT64_MIN
1391 ldi A7, 0x7f
1392 cp A7, B7
1393 XCALL __sbc_8
1394 subi A7, 0x80
1395 0: ret
1396 ENDF __sssub_8
1397 #endif /* L_sssub_8 */
1398
1399 #undef A0
1400 #undef A1
1401 #undef A2
1402 #undef A3
1403 #undef A4
1404 #undef A5
1405 #undef A6
1406 #undef A7
1407 #undef B0
1408 #undef B1
1409 #undef B2
1410 #undef B3
1411 #undef B4
1412 #undef B5
1413 #undef B6
1414 #undef B7
1415
1416 \f
1417 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1418 ;; Rounding Helpers
1419 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1420
1421 #ifdef L_mask1
1422
1423 #define AA 24
1424 #define CC 25
1425
1426 ;; R25 = 1 << (R24 & 7)
1427 ;; CC = 1 << (AA & 7)
1428 ;; Clobbers: None
1429 DEFUN __mask1
1430 ;; CC = 2 ^ AA.1
1431 ldi CC, 1 << 2
1432 sbrs AA, 1
1433 ldi CC, 1 << 0
1434 ;; CC *= 2 ^ AA.0
1435 sbrc AA, 0
1436 lsl CC
1437 ;; CC *= 2 ^ AA.2
1438 sbrc AA, 2
1439 swap CC
1440 ret
1441 ENDF __mask1
1442
1443 #undef AA
1444 #undef CC
1445 #endif /* L_mask1 */
1446
1447 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1448
1449 ;; The rounding point. Any bits smaller than
1450 ;; 2^{-RP} will be cleared.
1451 #define RP R24
1452
1453 #define A0 22
1454 #define A1 A0 + 1
1455
1456 #define C0 24
1457 #define C1 C0 + 1
1458
1459 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1460 ;; Rounding, 1 Byte
1461 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1462
1463 #ifdef L_roundqq3
1464
1465 ;; R24 = round (R22, R24)
1466 ;; Clobbers: R22, __tmp_reg__
1467 DEFUN __roundqq3
1468 mov __tmp_reg__, C1
1469 subi RP, __QQ_FBIT__ - 1
1470 neg RP
1471 ;; R25 = 1 << RP (Total offset is FBIT-1 - RP)
1472 XCALL __mask1
1473 mov C0, C1
1474 ;; Add-Saturate 2^{-RP-1}
1475 add A0, C0
1476 brvc 0f
1477 ldi C0, 0x7f
1478 rjmp 9f
1479 0: ;; Mask out bits beyond RP
1480 lsl C0
1481 neg C0
1482 and C0, A0
1483 9: mov C1, __tmp_reg__
1484 ret
1485 ENDF __roundqq3
1486 #endif /* L_roundqq3 */
1487
1488 #ifdef L_rounduqq3
1489
1490 ;; R24 = round (R22, R24)
1491 ;; Clobbers: R22, __tmp_reg__
1492 DEFUN __rounduqq3
1493 mov __tmp_reg__, C1
1494 subi RP, __UQQ_FBIT__ - 1
1495 neg RP
1496 ;; R25 = 1 << RP (Total offset is FBIT-1 - RP)
1497 XCALL __mask1
1498 mov C0, C1
1499 ;; Add-Saturate 2^{-RP-1}
1500 add A0, C0
1501 brcc 0f
1502 ldi C0, 0xff
1503 rjmp 9f
1504 0: ;; Mask out bits beyond RP
1505 lsl C0
1506 neg C0
1507 and C0, A0
1508 9: mov C1, __tmp_reg__
1509 ret
1510 ENDF __rounduqq3
1511 #endif /* L_rounduqq3 */
1512
1513 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1514 ;; Rounding, 2 Bytes
1515 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1516
1517 #ifdef L_addmask_2
1518
1519 ;; [ R25:R24 = 1 << (R24 & 15)
1520 ;; R23:R22 += 1 << (R24 & 15) ]
1521 ;; SREG is set according to the addition
1522 DEFUN __addmask_2
1523 ;; R25 = 1 << (R24 & 7)
1524 XCALL __mask1
1525 cpi RP, 1 << 3
1526 sbc C0, C0
1527 ;; Swap C0 and C1 if RP.3 was set
1528 and C0, C1
1529 eor C1, C0
1530 ;; Finally, add the power-of-two: A[] += C[]
1531 add A0, C0
1532 adc A1, C1
1533 ret
1534 ENDF __addmask_2
1535 #endif /* L_addmask_2 */
1536
1537 #ifdef L_round_s2
1538
1539 ;; R25:R24 = round (R23:R22, R24)
1540 ;; Clobbers: R23, R22
1541 DEFUN __roundhq3
1542 subi RP, __HQ_FBIT__ - __HA_FBIT__
1543 ENDF __roundhq3
1544 DEFUN __roundha3
1545 subi RP, __HA_FBIT__ - 1
1546 neg RP
1547 ;; [ R25:R24 = 1 << (FBIT-1 - RP)
1548 ;; R23:R22 += 1 << (FBIT-1 - RP) ]
1549 XCALL __addmask_2
1550 XJMP __round_s2_const
1551 ENDF __roundha3
1552
1553 #endif /* L_round_s2 */
1554
1555 #ifdef L_round_u2
1556
1557 ;; R25:R24 = round (R23:R22, R24)
1558 ;; Clobbers: R23, R22
1559 DEFUN __rounduhq3
1560 subi RP, __UHQ_FBIT__ - __UHA_FBIT__
1561 ENDF __rounduhq3
1562 DEFUN __rounduha3
1563 subi RP, __UHA_FBIT__ - 1
1564 neg RP
1565 ;; [ R25:R24 = 1 << (FBIT-1 - RP)
1566 ;; R23:R22 += 1 << (FBIT-1 - RP) ]
1567 XCALL __addmask_2
1568 XJMP __round_u2_const
1569 ENDF __rounduha3
1570
1571 #endif /* L_round_u2 */
1572
1573
1574 #ifdef L_round_2_const
1575
1576 ;; Helpers for 2 byte wide rounding
1577
1578 DEFUN __round_s2_const
1579 brvc 2f
1580 ldi C1, 0x7f
1581 rjmp 1f
1582 ;; FALLTHRU (Barrier)
1583 ENDF __round_s2_const
1584
1585 DEFUN __round_u2_const
1586 brcc 2f
1587 ldi C1, 0xff
1588 1:
1589 ldi C0, 0xff
1590 rjmp 9f
1591 2:
1592 ;; Saturation is performed now.
1593 ;; Currently, we have C[] = 2^{-RP-1}
1594 ;; C[] = 2^{-RP}
1595 lsl C0
1596 rol C1
1597 ;;
1598 NEG2 C0
1599 ;; Clear the bits beyond the rounding point.
1600 and C0, A0
1601 and C1, A1
1602 9: ret
1603 ENDF __round_u2_const
1604
1605 #endif /* L_round_2_const */
1606
1607 #undef A0
1608 #undef A1
1609 #undef C0
1610 #undef C1
1611
1612 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1613 ;; Rounding, 4 Bytes
1614 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1615
1616 #define A0 18
1617 #define A1 A0 + 1
1618 #define A2 A0 + 2
1619 #define A3 A0 + 3
1620
1621 #define C0 22
1622 #define C1 C0 + 1
1623 #define C2 C0 + 2
1624 #define C3 C0 + 3
1625
1626 #ifdef L_addmask_4
1627
1628 ;; [ R25:R22 = 1 << (R24 & 31)
1629 ;; R21:R18 += 1 << (R24 & 31) ]
1630 ;; SREG is set according to the addition
1631 DEFUN __addmask_4
1632 ;; R25 = 1 << (R24 & 7)
1633 XCALL __mask1
1634 cpi RP, 1 << 4
1635 sbc C0, C0
1636 sbc C1, C1
1637 ;; Swap C2 with C3 if RP.3 is not set
1638 cpi RP, 1 << 3
1639 sbc C2, C2
1640 and C2, C3
1641 eor C3, C2
1642 ;; Swap C3:C2 with C1:C0 if RP.4 is not set
1643 and C0, C2 $ eor C2, C0
1644 and C1, C3 $ eor C3, C1
1645 ;; Finally, add the power-of-two: A[] += C[]
1646 add A0, C0
1647 adc A1, C1
1648 adc A2, C2
1649 adc A3, C3
1650 ret
1651 ENDF __addmask_4
1652 #endif /* L_addmask_4 */
1653
1654 #ifdef L_round_s4
1655
1656 ;; R25:R22 = round (R21:R18, R24)
1657 ;; Clobbers: R18...R21
1658 DEFUN __roundsq3
1659 subi RP, __SQ_FBIT__ - __SA_FBIT__
1660 ENDF __roundsq3
1661 DEFUN __roundsa3
1662 subi RP, __SA_FBIT__ - 1
1663 neg RP
1664 ;; [ R25:R22 = 1 << (FBIT-1 - RP)
1665 ;; R21:R18 += 1 << (FBIT-1 - RP) ]
1666 XCALL __addmask_4
1667 XJMP __round_s4_const
1668 ENDF __roundsa3
1669
1670 #endif /* L_round_s4 */
1671
1672 #ifdef L_round_u4
1673
1674 ;; R25:R22 = round (R21:R18, R24)
1675 ;; Clobbers: R18...R21
1676 DEFUN __roundusq3
1677 subi RP, __USQ_FBIT__ - __USA_FBIT__
1678 ENDF __roundusq3
1679 DEFUN __roundusa3
1680 subi RP, __USA_FBIT__ - 1
1681 neg RP
1682 ;; [ R25:R22 = 1 << (FBIT-1 - RP)
1683 ;; R21:R18 += 1 << (FBIT-1 - RP) ]
1684 XCALL __addmask_4
1685 XJMP __round_u4_const
1686 ENDF __roundusa3
1687
1688 #endif /* L_round_u4 */
1689
1690
1691 #ifdef L_round_4_const
1692
1693 ;; Helpers for 4 byte wide rounding
1694
1695 DEFUN __round_s4_const
1696 brvc 2f
1697 ldi C3, 0x7f
1698 rjmp 1f
1699 ;; FALLTHRU (Barrier)
1700 ENDF __round_s4_const
1701
1702 DEFUN __round_u4_const
1703 brcc 2f
1704 ldi C3, 0xff
1705 1:
1706 ldi C2, 0xff
1707 ldi C1, 0xff
1708 ldi C0, 0xff
1709 rjmp 9f
1710 2:
1711 ;; Saturation is performed now.
1712 ;; Currently, we have C[] = 2^{-RP-1}
1713 ;; C[] = 2^{-RP}
1714 lsl C0
1715 rol C1
1716 rol C2
1717 rol C3
1718 XCALL __negsi2
1719 ;; Clear the bits beyond the rounding point.
1720 and C0, A0
1721 and C1, A1
1722 and C2, A2
1723 and C3, A3
1724 9: ret
1725 ENDF __round_u4_const
1726
1727 #endif /* L_round_4_const */
1728
1729 #undef A0
1730 #undef A1
1731 #undef A2
1732 #undef A3
1733 #undef C0
1734 #undef C1
1735 #undef C2
1736 #undef C3
1737
1738 #undef RP
1739
1740 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1741 ;; Rounding, 8 Bytes
1742 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1743
1744 #define RP 16
1745 #define FBITm1 31
1746
1747 #define C0 18
1748 #define C1 C0 + 1
1749 #define C2 C0 + 2
1750 #define C3 C0 + 3
1751 #define C4 C0 + 4
1752 #define C5 C0 + 5
1753 #define C6 C0 + 6
1754 #define C7 C0 + 7
1755
1756 #define A0 16
1757 #define A1 17
1758 #define A2 26
1759 #define A3 27
1760 #define A4 28
1761 #define A5 29
1762 #define A6 30
1763 #define A7 31
1764
1765
1766 #ifdef L_rounddq3
1767 ;; R25:R18 = round (R25:R18, R16)
1768 ;; Clobbers: ABI
1769 DEFUN __rounddq3
1770 ldi FBITm1, __DQ_FBIT__ - 1
1771 clt
1772 XJMP __round_x8
1773 ENDF __rounddq3
1774 #endif /* L_rounddq3 */
1775
1776 #ifdef L_roundudq3
1777 ;; R25:R18 = round (R25:R18, R16)
1778 ;; Clobbers: ABI
1779 DEFUN __roundudq3
1780 ldi FBITm1, __UDQ_FBIT__ - 1
1781 set
1782 XJMP __round_x8
1783 ENDF __roundudq3
1784 #endif /* L_roundudq3 */
1785
1786 #ifdef L_roundda3
1787 ;; R25:R18 = round (R25:R18, R16)
1788 ;; Clobbers: ABI
1789 DEFUN __roundda3
1790 ldi FBITm1, __DA_FBIT__ - 1
1791 clt
1792 XJMP __round_x8
1793 ENDF __roundda3
1794 #endif /* L_roundda3 */
1795
1796 #ifdef L_rounduda3
1797 ;; R25:R18 = round (R25:R18, R16)
1798 ;; Clobbers: ABI
1799 DEFUN __rounduda3
1800 ldi FBITm1, __UDA_FBIT__ - 1
1801 set
1802 XJMP __round_x8
1803 ENDF __rounduda3
1804 #endif /* L_rounduda3 */
1805
1806 #ifdef L_roundta3
1807 ;; R25:R18 = round (R25:R18, R16)
1808 ;; Clobbers: ABI
1809 DEFUN __roundta3
1810 ldi FBITm1, __TA_FBIT__ - 1
1811 clt
1812 XJMP __round_x8
1813 ENDF __roundta3
1814 #endif /* L_roundta3 */
1815
1816 #ifdef L_rounduta3
1817 ;; R25:R18 = round (R25:R18, R16)
1818 ;; Clobbers: ABI
1819 DEFUN __rounduta3
1820 ldi FBITm1, __UTA_FBIT__ - 1
1821 set
1822 XJMP __round_x8
1823 ENDF __rounduta3
1824 #endif /* L_rounduta3 */
1825
1826
1827 #ifdef L_round_x8
1828 DEFUN __round_x8
1829 push r16
1830 push r17
1831 push r28
1832 push r29
1833 ;; Compute log2 of addend from rounding point
1834 sub RP, FBITm1
1835 neg RP
1836 ;; Move input to work register A[]
1837 push C0
1838 mov A1, C1
1839 wmov A2, C2
1840 wmov A4, C4
1841 wmov A6, C6
1842 ;; C[] = 1 << (FBIT-1 - RP)
1843 XCALL __clr_8
1844 inc C0
1845 XCALL __ashldi3
1846 pop A0
1847 ;; A[] += C[]
1848 add A0, C0
1849 adc A1, C1
1850 adc A2, C2
1851 adc A3, C3
1852 adc A4, C4
1853 adc A5, C5
1854 adc A6, C6
1855 adc A7, C7
1856 brts 1f
1857 ;; Signed
1858 brvc 3f
1859 ;; Signed overflow: A[] = 0x7f...
1860 brvs 2f
1861 1: ;; Unsigned
1862 brcc 3f
1863 ;; Unsigned overflow: A[] = 0xff...
1864 2: ldi C7, 0xff
1865 ldi C6, 0xff
1866 wmov C0, C6
1867 wmov C2, C6
1868 wmov C4, C6
1869 bld C7, 7
1870 rjmp 9f
1871 3:
1872 ;; C[] = -C[] - C[]
1873 push A0
1874 ldi r16, 1
1875 XCALL __ashldi3
1876 pop A0
1877 XCALL __negdi2
1878 ;; Clear the bits beyond the rounding point.
1879 and C0, A0
1880 and C1, A1
1881 and C2, A2
1882 and C3, A3
1883 and C4, A4
1884 and C5, A5
1885 and C6, A6
1886 and C7, A7
1887 9: ;; Epilogue
1888 pop r29
1889 pop r28
1890 pop r17
1891 pop r16
1892 ret
1893 ENDF __round_x8
1894
1895 #endif /* L_round_x8 */
1896
1897 #undef A0
1898 #undef A1
1899 #undef A2
1900 #undef A3
1901 #undef A4
1902 #undef A5
1903 #undef A6
1904 #undef A7
1905
1906 #undef C0
1907 #undef C1
1908 #undef C2
1909 #undef C3
1910 #undef C4
1911 #undef C5
1912 #undef C6
1913 #undef C7
1914
1915 #undef RP
1916 #undef FBITm1
1917
1918
1919 ;; Supply implementations / symbols for the bit-banging functions
1920 ;; __builtin_avr_bitsfx and __builtin_avr_fxbits
1921 #ifdef L_ret
1922 DEFUN __ret
1923 ret
1924 ENDF __ret
1925 #endif /* L_ret */
1926
1927 #endif /* if not __AVR_TINY__ */