]> git.ipfire.org Git - thirdparty/gcc.git/blame - libgcc/config/avr/lib1funcs-fixed.S
configure.ac (cloog/isl): Also allow ISL 0.11.x and CLooG 0.18.0.
[thirdparty/gcc.git] / libgcc / config / avr / lib1funcs-fixed.S
CommitLineData
e55e4056
GJL
1/* -*- Mode: Asm -*- */
2;; Copyright (C) 2012
3;; Free Software Foundation, Inc.
4;; Contributed by Sean D'Epagnier (sean@depagnier.com)
5;; Georg-Johann Lay (avr@gjlay.de)
6
7;; This file is free software; you can redistribute it and/or modify it
8;; under the terms of the GNU General Public License as published by the
9;; Free Software Foundation; either version 3, or (at your option) any
10;; later version.
11
12;; In addition to the permissions in the GNU General Public License, the
13;; Free Software Foundation gives you unlimited permission to link the
14;; compiled version of this file into combinations with other programs,
15;; and to distribute those combinations without any restriction coming
16;; from the use of this file. (The General Public License restrictions
17;; do apply in other respects; for example, they cover modification of
18;; the file, and distribution when not linked into a combine
19;; executable.)
20
21;; This file is distributed in the hope that it will be useful, but
22;; WITHOUT ANY WARRANTY; without even the implied warranty of
23;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24;; General Public License for more details.
25
26;; You should have received a copy of the GNU General Public License
27;; along with this program; see the file COPYING. If not, write to
28;; the Free Software Foundation, 51 Franklin Street, Fifth Floor,
29;; Boston, MA 02110-1301, USA.
30
31;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
32;; Fixed point library routines for AVR
33;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
34
35.section .text.libgcc.fixed, "ax", @progbits
36
37;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
38;; Conversions to float
39;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
40
41#if defined (L_fractqqsf)
42DEFUN __fractqqsf
43 ;; Move in place for SA -> SF conversion
44 clr r22
45 mov r23, r24
e55e4056 46 ;; Sign-extend
e13d9d5a 47 lsl r24
e55e4056
GJL
48 sbc r24, r24
49 mov r25, r24
50 XJMP __fractsasf
51ENDF __fractqqsf
52#endif /* L_fractqqsf */
53
54#if defined (L_fractuqqsf)
55DEFUN __fractuqqsf
56 ;; Move in place for USA -> SF conversion
57 clr r22
58 mov r23, r24
59 ;; Zero-extend
60 clr r24
61 clr r25
62 XJMP __fractusasf
63ENDF __fractuqqsf
64#endif /* L_fractuqqsf */
65
66#if defined (L_fracthqsf)
67DEFUN __fracthqsf
68 ;; Move in place for SA -> SF conversion
69 wmov 22, 24
e55e4056 70 ;; Sign-extend
e13d9d5a 71 lsl r25
e55e4056
GJL
72 sbc r24, r24
73 mov r25, r24
74 XJMP __fractsasf
75ENDF __fracthqsf
76#endif /* L_fracthqsf */
77
78#if defined (L_fractuhqsf)
79DEFUN __fractuhqsf
80 ;; Move in place for USA -> SF conversion
81 wmov 22, 24
82 ;; Zero-extend
83 clr r24
84 clr r25
85 XJMP __fractusasf
86ENDF __fractuhqsf
87#endif /* L_fractuhqsf */
88
89#if defined (L_fracthasf)
90DEFUN __fracthasf
91 ;; Move in place for SA -> SF conversion
92 clr r22
93 mov r23, r24
94 mov r24, r25
95 ;; Sign-extend
96 lsl r25
97 sbc r25, r25
98 XJMP __fractsasf
99ENDF __fracthasf
100#endif /* L_fracthasf */
101
102#if defined (L_fractuhasf)
103DEFUN __fractuhasf
104 ;; Move in place for USA -> SF conversion
105 clr r22
106 mov r23, r24
107 mov r24, r25
108 ;; Zero-extend
109 clr r25
110 XJMP __fractusasf
111ENDF __fractuhasf
112#endif /* L_fractuhasf */
113
114
115#if defined (L_fractsqsf)
116DEFUN __fractsqsf
117 XCALL __floatsisf
118 ;; Divide non-zero results by 2^31 to move the
119 ;; decimal point into place
120 tst r25
121 breq 0f
122 subi r24, exp_lo (31)
123 sbci r25, exp_hi (31)
1240: ret
125ENDF __fractsqsf
126#endif /* L_fractsqsf */
127
128#if defined (L_fractusqsf)
129DEFUN __fractusqsf
130 XCALL __floatunsisf
131 ;; Divide non-zero results by 2^32 to move the
132 ;; decimal point into place
133 cpse r25, __zero_reg__
134 subi r25, exp_hi (32)
135 ret
136ENDF __fractusqsf
137#endif /* L_fractusqsf */
138
139#if defined (L_fractsasf)
140DEFUN __fractsasf
141 XCALL __floatsisf
e13d9d5a 142 ;; Divide non-zero results by 2^15 to move the
e55e4056 143 ;; decimal point into place
e13d9d5a
GJL
144 tst r25
145 breq 0f
146 subi r24, exp_lo (15)
147 sbci r25, exp_hi (15)
1480: ret
e55e4056
GJL
149ENDF __fractsasf
150#endif /* L_fractsasf */
151
152#if defined (L_fractusasf)
153DEFUN __fractusasf
154 XCALL __floatunsisf
155 ;; Divide non-zero results by 2^16 to move the
156 ;; decimal point into place
157 cpse r25, __zero_reg__
158 subi r25, exp_hi (16)
159 ret
160ENDF __fractusasf
161#endif /* L_fractusasf */
162
163;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
164;; Conversions from float
165;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
166
167#if defined (L_fractsfqq)
168DEFUN __fractsfqq
169 ;; Multiply with 2^{24+7} to get a QQ result in r25
170 subi r24, exp_lo (-31)
171 sbci r25, exp_hi (-31)
172 XCALL __fixsfsi
173 mov r24, r25
174 ret
175ENDF __fractsfqq
176#endif /* L_fractsfqq */
177
178#if defined (L_fractsfuqq)
179DEFUN __fractsfuqq
180 ;; Multiply with 2^{24+8} to get a UQQ result in r25
181 subi r25, exp_hi (-32)
182 XCALL __fixunssfsi
183 mov r24, r25
184 ret
185ENDF __fractsfuqq
186#endif /* L_fractsfuqq */
187
188#if defined (L_fractsfha)
189DEFUN __fractsfha
e13d9d5a
GJL
190 ;; Multiply with 2^{16+7} to get a HA result in r25:r24
191 subi r24, exp_lo (-23)
192 sbci r25, exp_hi (-23)
e55e4056
GJL
193 XJMP __fixsfsi
194ENDF __fractsfha
195#endif /* L_fractsfha */
196
197#if defined (L_fractsfuha)
198DEFUN __fractsfuha
199 ;; Multiply with 2^24 to get a UHA result in r25:r24
200 subi r25, exp_hi (-24)
201 XJMP __fixunssfsi
202ENDF __fractsfuha
203#endif /* L_fractsfuha */
204
205#if defined (L_fractsfhq)
e13d9d5a 206FALIAS __fractsfsq
e55e4056
GJL
207
208DEFUN __fractsfhq
209 ;; Multiply with 2^{16+15} to get a HQ result in r25:r24
210 ;; resp. with 2^31 to get a SQ result in r25:r22
211 subi r24, exp_lo (-31)
212 sbci r25, exp_hi (-31)
213 XJMP __fixsfsi
214ENDF __fractsfhq
215#endif /* L_fractsfhq */
216
217#if defined (L_fractsfuhq)
e13d9d5a 218FALIAS __fractsfusq
e55e4056
GJL
219
220DEFUN __fractsfuhq
221 ;; Multiply with 2^{16+16} to get a UHQ result in r25:r24
222 ;; resp. with 2^32 to get a USQ result in r25:r22
223 subi r25, exp_hi (-32)
224 XJMP __fixunssfsi
225ENDF __fractsfuhq
226#endif /* L_fractsfuhq */
227
228#if defined (L_fractsfsa)
229DEFUN __fractsfsa
e13d9d5a
GJL
230 ;; Multiply with 2^15 to get a SA result in r25:r22
231 subi r24, exp_lo (-15)
232 sbci r25, exp_hi (-15)
e55e4056
GJL
233 XJMP __fixsfsi
234ENDF __fractsfsa
235#endif /* L_fractsfsa */
236
237#if defined (L_fractsfusa)
238DEFUN __fractsfusa
239 ;; Multiply with 2^16 to get a USA result in r25:r22
240 subi r25, exp_hi (-16)
241 XJMP __fixunssfsi
242ENDF __fractsfusa
243#endif /* L_fractsfusa */
244
245
246;; For multiplication the functions here are called directly from
247;; avr-fixed.md instead of using the standard libcall mechanisms.
248;; This can make better code because GCC knows exactly which
249;; of the call-used registers (not all of them) are clobbered. */
250
251/*******************************************************
252 Fractional Multiplication 8 x 8 without MUL
253*******************************************************/
254
255#if defined (L_mulqq3) && !defined (__AVR_HAVE_MUL__)
256;;; R23 = R24 * R25
257;;; Clobbers: __tmp_reg__, R22, R24, R25
258;;; Rounding: ???
259DEFUN __mulqq3
260 XCALL __fmuls
261 ;; TR 18037 requires that (-1) * (-1) does not overflow
262 ;; The only input that can produce -1 is (-1)^2.
263 dec r23
264 brvs 0f
265 inc r23
2660: ret
267ENDF __mulqq3
268#endif /* L_mulqq3 && ! HAVE_MUL */
269
270/*******************************************************
271 Fractional Multiply .16 x .16 with and without MUL
272*******************************************************/
273
274#if defined (L_mulhq3)
275;;; Same code with and without MUL, but the interfaces differ:
276;;; no MUL: (R25:R24) = (R22:R23) * (R24:R25)
277;;; Clobbers: ABI, called by optabs
278;;; MUL: (R25:R24) = (R19:R18) * (R27:R26)
279;;; Clobbers: __tmp_reg__, R22, R23
280;;; Rounding: -0.5 LSB <= error <= 0.5 LSB
281DEFUN __mulhq3
282 XCALL __mulhisi3
283 ;; Shift result into place
284 lsl r23
285 rol r24
286 rol r25
287 brvs 1f
288 ;; Round
289 sbrc r23, 7
290 adiw r24, 1
291 ret
2921: ;; Overflow. TR 18037 requires (-1)^2 not to overflow
293 ldi r24, lo8 (0x7fff)
294 ldi r25, hi8 (0x7fff)
295 ret
296ENDF __mulhq3
297#endif /* defined (L_mulhq3) */
298
299#if defined (L_muluhq3)
300;;; Same code with and without MUL, but the interfaces differ:
301;;; no MUL: (R25:R24) *= (R23:R22)
302;;; Clobbers: ABI, called by optabs
303;;; MUL: (R25:R24) = (R19:R18) * (R27:R26)
304;;; Clobbers: __tmp_reg__, R22, R23
305;;; Rounding: -0.5 LSB < error <= 0.5 LSB
306DEFUN __muluhq3
307 XCALL __umulhisi3
308 ;; Round
309 sbrc r23, 7
310 adiw r24, 1
311 ret
312ENDF __muluhq3
313#endif /* L_muluhq3 */
314
315
316/*******************************************************
317 Fixed Multiply 8.8 x 8.8 with and without MUL
318*******************************************************/
319
320#if defined (L_mulha3)
321;;; Same code with and without MUL, but the interfaces differ:
322;;; no MUL: (R25:R24) = (R22:R23) * (R24:R25)
323;;; Clobbers: ABI, called by optabs
324;;; MUL: (R25:R24) = (R19:R18) * (R27:R26)
325;;; Clobbers: __tmp_reg__, R22, R23
326;;; Rounding: -0.5 LSB <= error <= 0.5 LSB
327DEFUN __mulha3
328 XCALL __mulhisi3
e13d9d5a
GJL
329 lsl r22
330 rol r23
331 rol r24
e55e4056
GJL
332 XJMP __muluha3_round
333ENDF __mulha3
334#endif /* L_mulha3 */
335
336#if defined (L_muluha3)
337;;; Same code with and without MUL, but the interfaces differ:
338;;; no MUL: (R25:R24) *= (R23:R22)
339;;; Clobbers: ABI, called by optabs
340;;; MUL: (R25:R24) = (R19:R18) * (R27:R26)
341;;; Clobbers: __tmp_reg__, R22, R23
342;;; Rounding: -0.5 LSB < error <= 0.5 LSB
343DEFUN __muluha3
344 XCALL __umulhisi3
345 XJMP __muluha3_round
346ENDF __muluha3
347#endif /* L_muluha3 */
348
349#if defined (L_muluha3_round)
350DEFUN __muluha3_round
351 ;; Shift result into place
352 mov r25, r24
353 mov r24, r23
354 ;; Round
355 sbrc r22, 7
356 adiw r24, 1
357 ret
358ENDF __muluha3_round
359#endif /* L_muluha3_round */
360
361
362/*******************************************************
363 Fixed Multiplication 16.16 x 16.16
364*******************************************************/
365
e13d9d5a
GJL
366;; Bits outside the result (below LSB), used in the signed version
367#define GUARD __tmp_reg__
368
e55e4056
GJL
369#if defined (__AVR_HAVE_MUL__)
370
371;; Multiplier
372#define A0 16
373#define A1 A0+1
374#define A2 A1+1
375#define A3 A2+1
376
377;; Multiplicand
378#define B0 20
379#define B1 B0+1
380#define B2 B1+1
381#define B3 B2+1
382
383;; Result
384#define C0 24
385#define C1 C0+1
386#define C2 C1+1
387#define C3 C2+1
388
389#if defined (L_mulusa3)
390;;; (C3:C0) = (A3:A0) * (B3:B0)
e13d9d5a
GJL
391DEFUN __mulusa3
392 set
393 ;; Fallthru
394ENDF __mulusa3
395
396;;; Round for last digit iff T = 1
397;;; Return guard bits in GUARD (__tmp_reg__).
398;;; Rounding, T = 0: -1.0 LSB < error <= 0 LSB
399;;; Rounding, T = 1: -0.5 LSB < error <= 0.5 LSB
400DEFUN __mulusa3_round
e55e4056
GJL
401 ;; Some of the MUL instructions have LSBs outside the result.
402 ;; Don't ignore these LSBs in order to tame rounding error.
403 ;; Use C2/C3 for these LSBs.
404
405 clr C0
406 clr C1
407 mul A0, B0 $ movw C2, r0
408
409 mul A1, B0 $ add C3, r0 $ adc C0, r1
410 mul A0, B1 $ add C3, r0 $ adc C0, r1 $ rol C1
411
e13d9d5a
GJL
412 ;; Round if T = 1. Store guarding bits outside the result for rounding
413 ;; and left-shift by the signed version (function below).
414 brtc 0f
e55e4056
GJL
415 sbrc C3, 7
416 adiw C0, 1
e13d9d5a 4170: push C3
e55e4056
GJL
418
419 ;; The following MULs don't have LSBs outside the result.
420 ;; C2/C3 is the high part.
421
422 mul A0, B2 $ add C0, r0 $ adc C1, r1 $ sbc C2, C2
423 mul A1, B1 $ add C0, r0 $ adc C1, r1 $ sbci C2, 0
424 mul A2, B0 $ add C0, r0 $ adc C1, r1 $ sbci C2, 0
425 neg C2
426
427 mul A0, B3 $ add C1, r0 $ adc C2, r1 $ sbc C3, C3
428 mul A1, B2 $ add C1, r0 $ adc C2, r1 $ sbci C3, 0
429 mul A2, B1 $ add C1, r0 $ adc C2, r1 $ sbci C3, 0
430 mul A3, B0 $ add C1, r0 $ adc C2, r1 $ sbci C3, 0
431 neg C3
432
433 mul A1, B3 $ add C2, r0 $ adc C3, r1
434 mul A2, B2 $ add C2, r0 $ adc C3, r1
435 mul A3, B1 $ add C2, r0 $ adc C3, r1
436
437 mul A2, B3 $ add C3, r0
438 mul A3, B2 $ add C3, r0
439
e13d9d5a
GJL
440 ;; Guard bits used in the signed version below.
441 pop GUARD
e55e4056
GJL
442 clr __zero_reg__
443 ret
e13d9d5a 444ENDF __mulusa3_round
e55e4056
GJL
445#endif /* L_mulusa3 */
446
447#if defined (L_mulsa3)
448;;; (C3:C0) = (A3:A0) * (B3:B0)
e13d9d5a 449;;; Clobbers: __tmp_reg__, T
e55e4056
GJL
450;;; Rounding: -0.5 LSB <= error <= 0.5 LSB
451DEFUN __mulsa3
e13d9d5a
GJL
452 clt
453 XCALL __mulusa3_round
454 ;; A posteriori sign extension of the operands
e55e4056 455 tst B3
e13d9d5a 456 brpl 1f
e55e4056
GJL
457 sub C2, A0
458 sbc C3, A1
4591: sbrs A3, 7
e13d9d5a 460 rjmp 2f
e55e4056
GJL
461 sub C2, B0
462 sbc C3, B1
e13d9d5a
GJL
4632:
464 ;; Shift 1 bit left to adjust for 15 fractional bits
465 lsl GUARD
466 rol C0
467 rol C1
468 rol C2
469 rol C3
470 ;; Round last digit
471 lsl GUARD
472 adc C0, __zero_reg__
473 adc C1, __zero_reg__
474 adc C2, __zero_reg__
475 adc C3, __zero_reg__
e55e4056
GJL
476 ret
477ENDF __mulsa3
478#endif /* L_mulsa3 */
479
480#undef A0
481#undef A1
482#undef A2
483#undef A3
484#undef B0
485#undef B1
486#undef B2
487#undef B3
488#undef C0
489#undef C1
490#undef C2
491#undef C3
492
493#else /* __AVR_HAVE_MUL__ */
494
495#define A0 18
496#define A1 A0+1
497#define A2 A0+2
498#define A3 A0+3
499
500#define B0 22
501#define B1 B0+1
502#define B2 B0+2
503#define B3 B0+3
504
505#define C0 22
506#define C1 C0+1
507#define C2 C0+2
508#define C3 C0+3
509
510;; __tmp_reg__
511#define CC0 0
512;; __zero_reg__
513#define CC1 1
514#define CC2 16
515#define CC3 17
516
517#define AA0 26
518#define AA1 AA0+1
519#define AA2 30
520#define AA3 AA2+1
521
522#if defined (L_mulsa3)
523;;; (R25:R22) *= (R21:R18)
524;;; Clobbers: ABI, called by optabs
525;;; Rounding: -1 LSB <= error <= 1 LSB
526DEFUN __mulsa3
527 push B0
528 push B1
e13d9d5a
GJL
529 push B3
530 clt
531 XCALL __mulusa3_round
532 pop r30
533 ;; sign-extend B
534 bst r30, 7
535 brtc 1f
536 ;; A1, A0 survived in R27:R26
537 sub C2, AA0
538 sbc C3, AA1
5391:
540 pop AA1 ;; B1
541 pop AA0 ;; B0
542
543 ;; sign-extend A. A3 survived in R31
e55e4056 544 bst AA3, 7
e13d9d5a 545 brtc 2f
e55e4056
GJL
546 sub C2, AA0
547 sbc C3, AA1
e13d9d5a
GJL
5482:
549 ;; Shift 1 bit left to adjust for 15 fractional bits
550 lsl GUARD
551 rol C0
552 rol C1
553 rol C2
554 rol C3
555 ;; Round last digit
556 lsl GUARD
557 adc C0, __zero_reg__
558 adc C1, __zero_reg__
559 adc C2, __zero_reg__
560 adc C3, __zero_reg__
561 ret
e55e4056
GJL
562ENDF __mulsa3
563#endif /* L_mulsa3 */
564
565#if defined (L_mulusa3)
566;;; (R25:R22) *= (R21:R18)
e13d9d5a 567;;; Clobbers: ABI, called by optabs
e55e4056 568;;; Rounding: -1 LSB <= error <= 1 LSB
e13d9d5a
GJL
569DEFUN __mulusa3
570 set
571 ;; Fallthru
572ENDF __mulusa3
573
574;;; A[] survives in 26, 27, 30, 31
575;;; Also used by __mulsa3 with T = 0
576;;; Round if T = 1
577;;; Return Guard bits in GUARD (__tmp_reg__), used by signed version.
578DEFUN __mulusa3_round
e55e4056
GJL
579 push CC2
580 push CC3
581 ; clear result
582 clr __tmp_reg__
583 wmov CC2, CC0
584 ; save multiplicand
585 wmov AA0, A0
586 wmov AA2, A2
587 rjmp 3f
588
589 ;; Loop the integral part
590
5911: ;; CC += A * 2^n; n >= 0
592 add CC0,A0 $ adc CC1,A1 $ adc CC2,A2 $ adc CC3,A3
593
5942: ;; A <<= 1
595 lsl A0 $ rol A1 $ rol A2 $ rol A3
596
5973: ;; IBIT(B) >>= 1
598 ;; Carry = n-th bit of B; n >= 0
599 lsr B3
600 ror B2
601 brcs 1b
602 sbci B3, 0
603 brne 2b
604
605 ;; Loop the fractional part
606 ;; B2/B3 is 0 now, use as guard bits for rounding
607 ;; Restore multiplicand
608 wmov A0, AA0
609 wmov A2, AA2
610 rjmp 5f
611
6124: ;; CC += A:Guard * 2^n; n < 0
613 add B3,B2 $ adc CC0,A0 $ adc CC1,A1 $ adc CC2,A2 $ adc CC3,A3
6145:
615 ;; A:Guard >>= 1
616 lsr A3 $ ror A2 $ ror A1 $ ror A0 $ ror B2
617
618 ;; FBIT(B) <<= 1
619 ;; Carry = n-th bit of B; n < 0
620 lsl B0
621 rol B1
622 brcs 4b
623 sbci B0, 0
624 brne 5b
625
e13d9d5a
GJL
626 ;; Save guard bits and set carry for rounding
627 push B3
e55e4056 628 lsl B3
e13d9d5a 629 ;; Move result into place
e55e4056
GJL
630 wmov C2, CC2
631 wmov C0, CC0
632 clr __zero_reg__
e13d9d5a
GJL
633 brtc 6f
634 ;; Round iff T = 1
e55e4056
GJL
635 adc C0, __zero_reg__
636 adc C1, __zero_reg__
637 adc C2, __zero_reg__
638 adc C3, __zero_reg__
e13d9d5a
GJL
6396:
640 pop GUARD
e55e4056
GJL
641 ;; Epilogue
642 pop CC3
643 pop CC2
644 ret
e13d9d5a 645ENDF __mulusa3_round
e55e4056
GJL
646#endif /* L_mulusa3 */
647
648#undef A0
649#undef A1
650#undef A2
651#undef A3
652#undef B0
653#undef B1
654#undef B2
655#undef B3
656#undef C0
657#undef C1
658#undef C2
659#undef C3
660#undef AA0
661#undef AA1
662#undef AA2
663#undef AA3
664#undef CC0
665#undef CC1
666#undef CC2
667#undef CC3
668
669#endif /* __AVR_HAVE_MUL__ */
670
e13d9d5a
GJL
671#undef GUARD
672
e55e4056
GJL
673/*******************************************************
674 Fractional Division 8 / 8
675*******************************************************/
676
677#define r_divd r25 /* dividend */
678#define r_quo r24 /* quotient */
679#define r_div r22 /* divisor */
e13d9d5a 680#define r_sign __tmp_reg__
e55e4056
GJL
681
682#if defined (L_divqq3)
683DEFUN __divqq3
e13d9d5a
GJL
684 mov r_sign, r_divd
685 eor r_sign, r_div
e55e4056
GJL
686 sbrc r_div, 7
687 neg r_div
688 sbrc r_divd, 7
689 neg r_divd
e13d9d5a 690 XCALL __divqq_helper
e55e4056 691 lsr r_quo
e13d9d5a 692 sbrc r_sign, 7 ; negate result if needed
e55e4056
GJL
693 neg r_quo
694 ret
e55e4056 695ENDF __divqq3
e13d9d5a 696#endif /* L_divqq3 */
e55e4056
GJL
697
698#if defined (L_udivuqq3)
699DEFUN __udivuqq3
e13d9d5a
GJL
700 cp r_divd, r_div
701 brsh 0f
702 XJMP __divqq_helper
703 ;; Result is out of [0, 1) ==> Return 1 - eps.
7040: ldi r_quo, 0xff
705 ret
706ENDF __udivuqq3
707#endif /* L_udivuqq3 */
708
709
710#if defined (L_divqq_helper)
711DEFUN __divqq_helper
e55e4056
GJL
712 clr r_quo ; clear quotient
713 inc __zero_reg__ ; init loop counter, used per shift
714__udivuqq3_loop:
715 lsl r_divd ; shift dividend
716 brcs 0f ; dividend overflow
717 cp r_divd,r_div ; compare dividend & divisor
718 brcc 0f ; dividend >= divisor
719 rol r_quo ; shift quotient (with CARRY)
720 rjmp __udivuqq3_cont
7210:
722 sub r_divd,r_div ; restore dividend
723 lsl r_quo ; shift quotient (without CARRY)
724__udivuqq3_cont:
725 lsl __zero_reg__ ; shift loop-counter bit
726 brne __udivuqq3_loop
727 com r_quo ; complement result
728 ; because C flag was complemented in loop
729 ret
e13d9d5a
GJL
730ENDF __divqq_helper
731#endif /* L_divqq_helper */
e55e4056
GJL
732
733#undef r_divd
734#undef r_quo
735#undef r_div
e13d9d5a 736#undef r_sign
e55e4056
GJL
737
738
739/*******************************************************
740 Fractional Division 16 / 16
741*******************************************************/
742#define r_divdL 26 /* dividend Low */
743#define r_divdH 27 /* dividend Hig */
744#define r_quoL 24 /* quotient Low */
745#define r_quoH 25 /* quotient High */
746#define r_divL 22 /* divisor */
747#define r_divH 23 /* divisor */
748#define r_cnt 21
749
750#if defined (L_divhq3)
751DEFUN __divhq3
752 mov r0, r_divdH
753 eor r0, r_divH
754 sbrs r_divH, 7
755 rjmp 1f
756 NEG2 r_divL
7571:
758 sbrs r_divdH, 7
759 rjmp 2f
760 NEG2 r_divdL
7612:
762 cp r_divdL, r_divL
763 cpc r_divdH, r_divH
764 breq __divhq3_minus1 ; if equal return -1
765 XCALL __udivuhq3
766 lsr r_quoH
767 ror r_quoL
768 brpl 9f
769 ;; negate result if needed
770 NEG2 r_quoL
7719:
772 ret
773__divhq3_minus1:
774 ldi r_quoH, 0x80
775 clr r_quoL
776 ret
777ENDF __divhq3
778#endif /* defined (L_divhq3) */
779
780#if defined (L_udivuhq3)
781DEFUN __udivuhq3
782 sub r_quoH,r_quoH ; clear quotient and carry
783 ;; FALLTHRU
784ENDF __udivuhq3
785
786DEFUN __udivuha3_common
787 clr r_quoL ; clear quotient
788 ldi r_cnt,16 ; init loop counter
789__udivuhq3_loop:
790 rol r_divdL ; shift dividend (with CARRY)
791 rol r_divdH
792 brcs __udivuhq3_ep ; dividend overflow
793 cp r_divdL,r_divL ; compare dividend & divisor
794 cpc r_divdH,r_divH
795 brcc __udivuhq3_ep ; dividend >= divisor
796 rol r_quoL ; shift quotient (with CARRY)
797 rjmp __udivuhq3_cont
798__udivuhq3_ep:
799 sub r_divdL,r_divL ; restore dividend
800 sbc r_divdH,r_divH
801 lsl r_quoL ; shift quotient (without CARRY)
802__udivuhq3_cont:
803 rol r_quoH ; shift quotient
804 dec r_cnt ; decrement loop counter
805 brne __udivuhq3_loop
806 com r_quoL ; complement result
807 com r_quoH ; because C flag was complemented in loop
808 ret
809ENDF __udivuha3_common
810#endif /* defined (L_udivuhq3) */
811
812/*******************************************************
813 Fixed Division 8.8 / 8.8
814*******************************************************/
815#if defined (L_divha3)
816DEFUN __divha3
817 mov r0, r_divdH
818 eor r0, r_divH
819 sbrs r_divH, 7
820 rjmp 1f
821 NEG2 r_divL
8221:
823 sbrs r_divdH, 7
824 rjmp 2f
825 NEG2 r_divdL
8262:
827 XCALL __udivuha3
e13d9d5a
GJL
828 lsr r_quoH ; adjust to 7 fractional bits
829 ror r_quoL
e55e4056
GJL
830 sbrs r0, 7 ; negate result if needed
831 ret
832 NEG2 r_quoL
833 ret
834ENDF __divha3
835#endif /* defined (L_divha3) */
836
837#if defined (L_udivuha3)
838DEFUN __udivuha3
839 mov r_quoH, r_divdL
840 mov r_divdL, r_divdH
841 clr r_divdH
842 lsl r_quoH ; shift quotient into carry
843 XJMP __udivuha3_common ; same as fractional after rearrange
844ENDF __udivuha3
845#endif /* defined (L_udivuha3) */
846
847#undef r_divdL
848#undef r_divdH
849#undef r_quoL
850#undef r_quoH
851#undef r_divL
852#undef r_divH
853#undef r_cnt
854
855/*******************************************************
856 Fixed Division 16.16 / 16.16
857*******************************************************/
858
859#define r_arg1L 24 /* arg1 gets passed already in place */
860#define r_arg1H 25
861#define r_arg1HL 26
862#define r_arg1HH 27
863#define r_divdL 26 /* dividend Low */
864#define r_divdH 27
865#define r_divdHL 30
866#define r_divdHH 31 /* dividend High */
867#define r_quoL 22 /* quotient Low */
868#define r_quoH 23
869#define r_quoHL 24
870#define r_quoHH 25 /* quotient High */
871#define r_divL 18 /* divisor Low */
872#define r_divH 19
873#define r_divHL 20
874#define r_divHH 21 /* divisor High */
875#define r_cnt __zero_reg__ /* loop count (0 after the loop!) */
876
877#if defined (L_divsa3)
878DEFUN __divsa3
879 mov r0, r_arg1HH
880 eor r0, r_divHH
881 sbrs r_divHH, 7
882 rjmp 1f
883 NEG4 r_divL
8841:
885 sbrs r_arg1HH, 7
886 rjmp 2f
887 NEG4 r_arg1L
8882:
889 XCALL __udivusa3
e13d9d5a
GJL
890 lsr r_quoHH ; adjust to 15 fractional bits
891 ror r_quoHL
892 ror r_quoH
893 ror r_quoL
e55e4056
GJL
894 sbrs r0, 7 ; negate result if needed
895 ret
51526856
GJL
896 ;; negate r_quoL
897 XJMP __negsi2
e55e4056
GJL
898ENDF __divsa3
899#endif /* defined (L_divsa3) */
900
901#if defined (L_udivusa3)
902DEFUN __udivusa3
903 ldi r_divdHL, 32 ; init loop counter
904 mov r_cnt, r_divdHL
905 clr r_divdHL
906 clr r_divdHH
907 wmov r_quoL, r_divdHL
908 lsl r_quoHL ; shift quotient into carry
909 rol r_quoHH
910__udivusa3_loop:
911 rol r_divdL ; shift dividend (with CARRY)
912 rol r_divdH
913 rol r_divdHL
914 rol r_divdHH
915 brcs __udivusa3_ep ; dividend overflow
916 cp r_divdL,r_divL ; compare dividend & divisor
917 cpc r_divdH,r_divH
918 cpc r_divdHL,r_divHL
919 cpc r_divdHH,r_divHH
920 brcc __udivusa3_ep ; dividend >= divisor
921 rol r_quoL ; shift quotient (with CARRY)
922 rjmp __udivusa3_cont
923__udivusa3_ep:
924 sub r_divdL,r_divL ; restore dividend
925 sbc r_divdH,r_divH
926 sbc r_divdHL,r_divHL
927 sbc r_divdHH,r_divHH
928 lsl r_quoL ; shift quotient (without CARRY)
929__udivusa3_cont:
930 rol r_quoH ; shift quotient
931 rol r_quoHL
932 rol r_quoHH
933 dec r_cnt ; decrement loop counter
934 brne __udivusa3_loop
935 com r_quoL ; complement result
936 com r_quoH ; because C flag was complemented in loop
937 com r_quoHL
938 com r_quoHH
939 ret
940ENDF __udivusa3
941#endif /* defined (L_udivusa3) */
942
943#undef r_arg1L
944#undef r_arg1H
945#undef r_arg1HL
946#undef r_arg1HH
947#undef r_divdL
948#undef r_divdH
949#undef r_divdHL
950#undef r_divdHH
951#undef r_quoL
952#undef r_quoH
953#undef r_quoHL
954#undef r_quoHH
955#undef r_divL
956#undef r_divH
957#undef r_divHL
958#undef r_divHH
959#undef r_cnt
51526856
GJL
960
961\f
962;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
963;; Saturation, 2 Bytes
964;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
965
966;; First Argument and Return Register
967#define A0 24
968#define A1 A0+1
969
970#if defined (L_ssneg_2)
971DEFUN __ssneg_2
972 NEG2 A0
973 brvc 0f
974 sbiw A0, 1
9750: ret
976ENDF __ssneg_2
977#endif /* L_ssneg_2 */
978
979#if defined (L_ssabs_2)
980DEFUN __ssabs_2
981 sbrs A1, 7
982 ret
983 XJMP __ssneg_2
984ENDF __ssabs_2
985#endif /* L_ssabs_2 */
986
987#undef A0
988#undef A1
989
990
991\f
992;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
993;; Saturation, 4 Bytes
994;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
995
996;; First Argument and Return Register
997#define A0 22
998#define A1 A0+1
999#define A2 A0+2
1000#define A3 A0+3
1001
1002#if defined (L_ssneg_4)
1003DEFUN __ssneg_4
1004 XCALL __negsi2
1005 brvc 0f
1006 ldi A3, 0x7f
1007 ldi A2, 0xff
1008 ldi A1, 0xff
1009 ldi A0, 0xff
10100: ret
1011ENDF __ssneg_4
1012#endif /* L_ssneg_4 */
1013
1014#if defined (L_ssabs_4)
1015DEFUN __ssabs_4
1016 sbrs A3, 7
1017 ret
1018 XJMP __ssneg_4
1019ENDF __ssabs_4
1020#endif /* L_ssabs_4 */
1021
1022#undef A0
1023#undef A1
1024#undef A2
1025#undef A3
1026
1027
1028\f
1029;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1030;; Saturation, 8 Bytes
1031;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1032
1033;; First Argument and Return Register
1034#define A0 18
1035#define A1 A0+1
1036#define A2 A0+2
1037#define A3 A0+3
1038#define A4 A0+4
1039#define A5 A0+5
1040#define A6 A0+6
1041#define A7 A0+7
1042
1043#if defined (L_clr_8)
1044FALIAS __usneguta2
1045FALIAS __usneguda2
1046FALIAS __usnegudq2
1047
1048;; Clear Carry and all Bytes
1049DEFUN __clr_8
1050 ;; Clear Carry and set Z
1051 sub A7, A7
1052 ;; FALLTHRU
1053ENDF __clr_8
1054;; Propagate Carry to all Bytes, Carry unaltered
1055DEFUN __sbc_8
1056 sbc A7, A7
1057 sbc A6, A6
1058 wmov A4, A6
1059 wmov A2, A6
1060 wmov A0, A6
1061 ret
1062ENDF __sbc_8
1063#endif /* L_clr_8 */
1064
1065#if defined (L_ssneg_8)
1066FALIAS __ssnegta2
1067FALIAS __ssnegda2
1068FALIAS __ssnegdq2
1069
1070DEFUN __ssneg_8
1071 XCALL __negdi2
1072 brvc 0f
1073 ;; A[] = 0x7fffffff
1074 sec
1075 XCALL __sbc_8
1076 ldi A7, 0x7f
10770: ret
1078ENDF __ssneg_8
1079#endif /* L_ssneg_8 */
1080
1081#if defined (L_ssabs_8)
1082FALIAS __ssabsta2
1083FALIAS __ssabsda2
1084FALIAS __ssabsdq2
1085
1086DEFUN __ssabs_8
1087 sbrs A7, 7
1088 ret
1089 XJMP __ssneg_8
1090ENDF __ssabs_8
1091#endif /* L_ssabs_8 */
1092
1093;; Second Argument
1094#define B0 10
1095#define B1 B0+1
1096#define B2 B0+2
1097#define B3 B0+3
1098#define B4 B0+4
1099#define B5 B0+5
1100#define B6 B0+6
1101#define B7 B0+7
1102
1103#if defined (L_usadd_8)
1104FALIAS __usadduta3
1105FALIAS __usadduda3
1106FALIAS __usaddudq3
1107
1108DEFUN __usadd_8
1109 XCALL __adddi3
1110 brcs 0f
1111 ret
e13d9d5a
GJL
11120: ;; A[] = 0xffffffff
1113 XJMP __sbc_8
51526856
GJL
1114ENDF __usadd_8
1115#endif /* L_usadd_8 */
1116
1117#if defined (L_ussub_8)
1118FALIAS __ussubuta3
1119FALIAS __ussubuda3
1120FALIAS __ussubudq3
1121
1122DEFUN __ussub_8
1123 XCALL __subdi3
1124 brcs 0f
1125 ret
e13d9d5a
GJL
11260: ;; A[] = 0
1127 XJMP __clr_8
51526856
GJL
1128ENDF __ussub_8
1129#endif /* L_ussub_8 */
1130
1131#if defined (L_ssadd_8)
1132FALIAS __ssaddta3
1133FALIAS __ssaddda3
1134FALIAS __ssadddq3
1135
1136DEFUN __ssadd_8
51526856
GJL
1137 XCALL __adddi3
1138 brvc 0f
e13d9d5a 1139 ;; A = (B >= 0) ? INT64_MAX : INT64_MIN
51526856
GJL
1140 cpi B7, 0x80
1141 XCALL __sbc_8
1142 subi A7, 0x80
11430: ret
1144ENDF __ssadd_8
1145#endif /* L_ssadd_8 */
1146
1147#if defined (L_sssub_8)
1148FALIAS __sssubta3
1149FALIAS __sssubda3
1150FALIAS __sssubdq3
1151
1152DEFUN __sssub_8
1153 XCALL __subdi3
1154 brvc 0f
e13d9d5a 1155 ;; A = (B < 0) ? INT64_MAX : INT64_MIN
51526856
GJL
1156 ldi A7, 0x7f
1157 cp A7, B7
1158 XCALL __sbc_8
1159 subi A7, 0x80
11600: ret
1161ENDF __sssub_8
1162#endif /* L_sssub_8 */
1163
1164#undef A0
1165#undef A1
1166#undef A2
1167#undef A3
1168#undef A4
1169#undef A5
1170#undef A6
1171#undef A7
1172#undef B0
1173#undef B1
1174#undef B2
1175#undef B3
1176#undef B4
1177#undef B5
1178#undef B6
1179#undef B7