]>
Commit | Line | Data |
---|---|---|
7adcbafe | 1 | ;; Copyright (C) 2019-2022 Free Software Foundation, Inc. |
f30dd607 GJL |
2 | ;; |
3 | ;; This file is part of LIBF7, which is part of GCC. | |
4 | ;; | |
5 | ;; GCC is free software; you can redistribute it and/or modify it under | |
6 | ;; the terms of the GNU General Public License as published by the Free | |
7 | ;; Software Foundation; either version 3, or (at your option) any later | |
8 | ;; version. | |
9 | ;; | |
10 | ;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
11 | ;; WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
12 | ;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
13 | ;; for more details. | |
14 | ;; | |
15 | ;; Under Section 7 of GPL version 3, you are granted additional | |
16 | ;; permissions described in the GCC Runtime Library Exception, version | |
17 | ;; 3.1, as published by the Free Software Foundation. | |
18 | ;; | |
19 | ;; You should have received a copy of the GNU General Public License and | |
20 | ;; a copy of the GCC Runtime Library Exception along with this program; | |
21 | ;; see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
22 | ;; <http://www.gnu.org/licenses/>. */ | |
23 | ||
24 | #ifndef __AVR_TINY__ | |
25 | ||
26 | #define ASM_DEFS_HAVE_DEFUN | |
27 | ||
28 | #include "asm-defs.h" | |
29 | #include "libf7.h" | |
30 | ||
31 | #define ZERO __zero_reg__ | |
32 | #define TMP __tmp_reg__ | |
33 | ||
34 | #define F7(name) F7_(name##_asm) | |
35 | ||
36 | .macro F7call name | |
37 | .global F7(\name\()) | |
38 | XCALL F7(\name\()) | |
39 | .endm | |
40 | ||
41 | .macro F7jmp name | |
42 | .global F7(\name\()) | |
43 | XJMP F7(\name\()) | |
44 | .endm | |
45 | ||
46 | ;; Just for visibility in disassembly. | |
47 | .macro LLL name | |
48 | .global LLL.\name | |
49 | LLL.\name: | |
50 | nop | |
51 | .endm | |
52 | ||
53 | .macro DEFUN name | |
54 | .section .text.libf7.asm.\name, "ax", @progbits | |
55 | .global F7(\name\()) | |
56 | .func F7(\name\()) | |
57 | F7(\name\()) : | |
58 | .endm | |
59 | ||
60 | .macro ENDF name | |
61 | .size F7(\name\()), . - F7(\name\()) | |
62 | .endfunc | |
63 | .endm | |
64 | ||
65 | .macro LABEL name | |
66 | .global F7(\name\()) | |
67 | F7(\name\()) : | |
68 | .endm | |
69 | ||
70 | .macro _DEFUN name | |
71 | .section .text.libf7.asm.\name, "ax", @progbits | |
72 | .weak \name | |
73 | .type \name, @function | |
74 | \name : | |
75 | .endm | |
76 | ||
77 | .macro _ENDF name | |
78 | .size \name, . - \name | |
79 | .endm | |
80 | ||
81 | .macro _LABEL name | |
82 | .weak \name | |
83 | .type \name, @function | |
84 | \name : | |
85 | .endm | |
86 | ||
87 | #define F7_NAME(X) F7_(X) | |
88 | ||
89 | ;; Make a weak alias. | |
90 | .macro ALIAS sym | |
91 | .weak \sym | |
92 | .type \sym, @function | |
93 | \sym: | |
94 | .endm | |
95 | ||
96 | ;; Make a weak alias if double is 64 bits wide. | |
97 | .macro DALIAS sym | |
98 | #if defined (WITH_LIBF7_MATH_SYMBOLS) && __SIZEOF_DOUBLE__ == 8 | |
99 | ALIAS \sym | |
100 | #endif | |
101 | .endm | |
102 | ||
103 | ;; Make a weak alias if long double is 64 bits wide. | |
104 | .macro LALIAS sym | |
105 | #if defined (WITH_LIBF7_MATH_SYMBOLS) && __SIZEOF_LONG_DOUBLE__ == 8 | |
106 | ALIAS \sym | |
107 | #endif | |
108 | .endm | |
109 | ||
110 | #define Off 1 | |
111 | #define Expo (Off + F7_MANT_BYTES) | |
112 | ||
113 | #ifdef F7MOD_classify_ | |
114 | ;; r24 = classify (*Z) | |
115 | ;; NaN -> F7_FLAG_nan | |
116 | ;; INF -> F7_FLAG_inf [ | F7_FLAG_sign ] | |
117 | ;; ==0 -> F7_FLAG_zero | |
118 | ;; ... -> 0 [ | F7_FLAG_sign ] | |
119 | ||
120 | ;; Clobbers: None (no TMP, no T). | |
121 | DEFUN classify | |
122 | ||
123 | ld r24, Z | |
124 | lsr r24 | |
125 | brne .Lnan_or_inf | |
126 | ||
127 | ldd r24, Z+6+Off | |
128 | tst r24 | |
129 | brpl 0f | |
130 | sbc r24, r24 | |
131 | andi r24, F7_FLAG_sign | |
132 | ret | |
133 | ||
134 | 0: ldi r24, F7_FLAG_zero | |
135 | ret | |
136 | ||
137 | .Lnan_or_inf: | |
138 | rol r24 | |
139 | ret | |
140 | ||
141 | ENDF classify | |
142 | #endif /* F7MOD_classify_ */ | |
143 | ||
144 | #ifdef F7MOD_clr_ | |
145 | DEFUN clr | |
146 | std Z+0, ZERO | |
147 | std Z+0+Off, ZERO | |
148 | std Z+1+Off, ZERO | |
149 | std Z+2+Off, ZERO | |
150 | std Z+3+Off, ZERO | |
151 | std Z+4+Off, ZERO | |
152 | std Z+5+Off, ZERO | |
153 | std Z+6+Off, ZERO | |
154 | std Z+0+Expo, ZERO | |
155 | std Z+1+Expo, ZERO | |
156 | ret | |
157 | ENDF clr | |
158 | ||
159 | #endif /* F7MOD_clr_ */ | |
160 | ||
161 | #ifdef F7MOD_clz_ | |
162 | ;; The libcc CLZ implementations like __clzsi2 aka. __builtin_clzl are | |
163 | ;; not very well suited for out purpose, so implement our own. | |
164 | ||
165 | #define ZBITS r26 | |
166 | .macro .test.byte reg | |
167 | or ZERO, \reg | |
168 | brne .Loop_bit | |
169 | subi ZBITS, -8 | |
170 | .endm | |
171 | ||
172 | ;; R26 = CLZ (uint64_t R18); CLZ (0) = 64. | |
173 | ;; Unchanged: T | |
174 | DEFUN clzdi2 | |
175 | clr ZBITS | |
176 | ;; Catch the common case of normalized .mant for speed-up. | |
177 | tst r25 | |
178 | brmi 9f | |
179 | .test.byte r25 | |
180 | .test.byte r24 | |
181 | .test.byte r23 | |
182 | .test.byte r22 | |
183 | .test.byte r21 | |
184 | .test.byte r20 | |
185 | .test.byte r19 | |
186 | .test.byte r18 | |
187 | .Ldone: | |
188 | clr ZERO | |
189 | 9: ret | |
190 | ||
191 | .Loop_bit: | |
192 | lsl ZERO | |
193 | brcs .Ldone | |
194 | inc ZBITS | |
195 | rjmp .Loop_bit | |
196 | ||
197 | ENDF clzdi2 | |
198 | #undef ZBITS | |
199 | #endif /* F7MOD_clz_ */ | |
200 | ||
201 | #ifdef F7MOD_cmp_mant_ | |
202 | DEFUN cmp_mant | |
203 | ||
204 | adiw X, 6 + Off | |
205 | ld r24, X $ ldd TMP, Z+6+Off $ SUB r24, TMP | |
206 | brne .Lunequal | |
207 | ||
208 | sbiw X, 6 | |
209 | ld r24, X+ $ ldd TMP, Z+0+Off $ SUB r24, TMP | |
210 | ld r24, X+ $ ldd TMP, Z+1+Off $ sbc r24, TMP | |
211 | ld r24, X+ $ ldd TMP, Z+2+Off $ sbc r24, TMP | |
212 | ld r24, X+ $ ldd TMP, Z+3+Off $ sbc r24, TMP | |
213 | ld r24, X+ $ ldd TMP, Z+4+Off $ sbc r24, TMP | |
214 | ld r24, X+ $ ldd TMP, Z+5+Off $ sbc r24, TMP | |
215 | ;; MSBs are already known to be equal | |
216 | breq 9f | |
217 | .Lunequal: | |
218 | sbc r24, r24 | |
219 | sbci r24, -1 | |
220 | 9: sbiw X, 6 + Off | |
221 | ret | |
222 | ENDF cmp_mant | |
223 | #endif /* F7MOD_cmp_mant_ */ | |
224 | ||
225 | #define CA 18 | |
226 | #define C0 CA+1 | |
227 | #define C1 C0+1 | |
228 | #define C2 C0+2 | |
229 | #define C3 C0+3 | |
230 | #define C4 C0+4 | |
231 | #define C5 C0+5 | |
232 | #define C6 C0+6 | |
233 | #define Carry r16 | |
234 | #define Flags 18 | |
235 | ||
236 | #ifdef F7MOD_store_ | |
237 | ;; Z->flags = CA. | |
238 | ;; Z->mant = C[7]. | |
239 | DEFUN store_mant.with_flags | |
240 | st Z, CA | |
241 | ||
242 | ;; Z->mant = C[7]. | |
243 | LABEL store_mant | |
244 | std Z+0+Off, C0 | |
245 | std Z+1+Off, C1 | |
246 | std Z+2+Off, C2 | |
247 | std Z+3+Off, C3 | |
248 | std Z+4+Off, C4 | |
249 | std Z+5+Off, C5 | |
250 | std Z+6+Off, C6 | |
251 | ret | |
252 | ENDF store_mant.with_flags | |
253 | #endif /* F7MOD_store_ */ | |
254 | ||
255 | #ifdef F7MOD_load_ | |
256 | ;; CA = Z->flags | |
257 | ;; C[7] = Z->mant | |
258 | DEFUN load_mant.with_flags | |
259 | ld CA, Z | |
260 | skipnext | |
261 | ||
262 | ;; CA = 0 | |
263 | ;; C[7] = Z->mant | |
264 | LABEL load_mant.clr_CA | |
265 | LABEL load_mant.clr_flags | |
266 | clr CA ; May be skipped | |
267 | ||
268 | ;; C[7] = Z->mant | |
269 | LABEL load_mant | |
270 | ldd C0, Z+0+Off | |
271 | ldd C1, Z+1+Off | |
272 | ldd C2, Z+2+Off | |
273 | ldd C3, Z+3+Off | |
274 | ldd C4, Z+4+Off | |
275 | ldd C5, Z+5+Off | |
276 | ldd C6, Z+6+Off | |
277 | ret | |
278 | ENDF load_mant.with_flags | |
279 | #endif /* F7MOD_load_ */ | |
280 | ||
281 | #ifdef F7MOD_copy_ | |
282 | DEFUN copy | |
283 | cp XL, ZL | |
284 | cpc XH, ZH | |
285 | breq 9f | |
286 | adiw XL, 10 | |
287 | adiw ZL, 10 | |
288 | set | |
289 | bld ZERO, 1 | |
290 | bld ZERO, 3 ; ZERO = 0b1010 = 10. | |
291 | .Loop: | |
292 | ld TMP, -X | |
293 | st -Z, TMP | |
294 | dec ZERO | |
295 | brne .Loop | |
296 | 9: ret | |
297 | ENDF copy | |
298 | #endif /* F7MOD_copy_ */ | |
299 | ||
300 | #ifdef F7MOD_copy_P_ | |
301 | DEFUN copy_P | |
302 | set | |
303 | bld ZERO, 1 | |
304 | bld ZERO, 3 ; ZERO = 0b1010 = 10. | |
305 | .Loop: | |
306 | #ifdef __AVR_HAVE_LPMX__ | |
307 | lpm TMP, Z+ | |
308 | #else | |
309 | lpm | |
310 | adiw Z, 1 | |
311 | #endif /* Have LPMx */ | |
312 | st X+, TMP | |
313 | dec ZERO | |
314 | brne .Loop | |
315 | sbiw X, 10 | |
316 | sbiw Z, 10 | |
317 | ret | |
318 | ENDF copy_P | |
319 | #endif /* F7MOD_copy_P_ */ | |
320 | ||
321 | #ifdef F7MOD_copy_mant_ | |
322 | DEFUN copy_mant | |
323 | cp XL, ZL | |
324 | cpc XH, ZH | |
325 | breq 9f | |
326 | adiw XL, 1 | |
327 | adiw ZL, 1 | |
328 | set | |
329 | bld ZERO, 3 | |
330 | dec ZERO ; ZERO = 7 | |
331 | .Loop: | |
332 | ld TMP, X+ | |
333 | st Z+, TMP | |
334 | dec ZERO | |
335 | brne .Loop | |
336 | sbiw XL, 8 | |
337 | sbiw ZL, 8 | |
338 | 9: ret | |
339 | ENDF copy_mant | |
340 | #endif /* F7MOD_copy_mant_ */ | |
341 | ||
342 | ||
343 | #ifdef F7MOD_clr_mant_lsbs_ | |
344 | DEFUN clr_mant_lsbs | |
345 | push r16 | |
346 | mov r16, r20 | |
347 | wmov XL, r24 | |
348 | ||
349 | wmov ZL, r22 | |
350 | F7call load_mant | |
351 | ||
352 | F7call lshrdi3 | |
353 | ||
354 | clr CA | |
355 | ||
356 | F7call ashldi3 | |
357 | ||
358 | pop r16 | |
359 | ||
360 | wmov ZL, XL | |
361 | F7jmp store_mant | |
362 | ||
363 | ENDF clr_mant_lsbs | |
364 | #endif /* F7MOD_clr_mant_lsbs_ */ | |
365 | ||
366 | ||
367 | #ifdef F7MOD_normalize_with_carry_ | |
368 | ;; Z = &f7_t | |
369 | ;; C[] = .mant may be not normalized | |
370 | ;; Carry === r16 = Addend to Z->expo in [-64, 128). | |
371 | ;; Normalize C[], set Flags, and adjust Z->expo. | |
372 | ;; Return CA (after normalization) in TMP. | |
373 | ;; Unchanged: T | |
374 | #define Addend r17 | |
375 | #define Zbits r26 | |
376 | #define expL r26 | |
377 | #define expH r27 | |
378 | DEFUN normalize_with_carry | |
379 | mov Addend, Carry | |
380 | tst C6 | |
381 | brmi .Lshift.0 | |
382 | ;; r26 = CLZ (uint64_t R18) | |
383 | F7call clzdi2 | |
384 | cpi Zbits, 64 | |
385 | breq .Lclr | |
386 | sub Addend, Zbits | |
387 | mov r16, Zbits | |
388 | ||
389 | F7call ashldi3 | |
390 | ;; Assert (R25.7 == 1) | |
391 | .Lshift.0: | |
392 | mov TMP, CA | |
393 | ld Flags, Z | |
394 | ||
395 | ;; .expo += Addend | |
396 | ldd expL, Z+0+Expo | |
397 | ldd expH, Z+1+Expo | |
398 | ;; Sign-extend Addend | |
399 | clr r16 | |
400 | sbrc Addend, 7 | |
401 | com r16 | |
402 | ||
403 | ;; exp += (int8_t) Addend, i.e. sign-extend Addend. | |
404 | add expL, Addend | |
405 | adc expH, r16 | |
406 | brvc .Lnormal | |
407 | tst r16 | |
408 | brmi .Lclr | |
409 | ;; Overflow | |
410 | #if F7_HAVE_Inf == 1 | |
411 | ori Flags, F7_FLAG_inf | |
412 | #else | |
413 | ldi Flags, F7_FLAG_nan | |
414 | #endif /* Have Inf */ | |
415 | ret | |
416 | ||
417 | .Lnormal: | |
418 | std Z+0+Expo, expL | |
419 | std Z+1+Expo, expH | |
420 | ret | |
421 | ||
422 | .Lclr: | |
423 | ;; Underflow or Zero. | |
424 | clr TMP | |
425 | .global __clr_8 | |
426 | XJMP __clr_8 | |
427 | ||
428 | LABEL normalize.store_with_flags | |
429 | ;; no rounding | |
430 | set | |
431 | skipnext | |
432 | LABEL normalize.round.store_with_flags | |
433 | ;; with rounding | |
434 | clt ; skipped ? | |
435 | LABEL normalize.maybe_round.store_with_flags | |
436 | F7call normalize_with_carry | |
437 | ;; We have: | |
438 | ;; Z = &f7_t | |
439 | ;; X = .expo | |
440 | ;; C[] = .mant | |
441 | ;; R18 = .flags | |
442 | ;; TMP = byte below .mant after normalization | |
443 | ;; T = 1 => no rounding. | |
444 | brts .Lstore | |
445 | lsl TMP | |
446 | adc C0, ZERO | |
447 | brcc .Lstore | |
448 | adc C1, ZERO | |
449 | adc C2, ZERO | |
450 | adc C3, ZERO | |
451 | adc C4, ZERO | |
452 | adc C5, ZERO | |
453 | adc C6, ZERO | |
454 | brcc .Lstore | |
455 | ;; We only come here if C6 overflowed, i.e. C[] is 0 now. | |
456 | ;; .mant = 1.0 by restoring the MSbit. | |
457 | ror C6 | |
458 | ;; .expo += 1 and override the .expo stored during normalize. | |
459 | adiw expL, 1 | |
460 | std Z+0+Expo, expL | |
461 | std Z+1+Expo, expH | |
462 | ||
463 | .Lstore: | |
464 | F7call store_mant.with_flags | |
465 | ||
466 | ;; Return the byte below .mant after normalization. | |
467 | ;; This is only useful without rounding; the caller will know. | |
468 | mov R24, TMP | |
469 | ret | |
470 | ENDF normalize_with_carry | |
471 | #endif /* F7MOD_normalize_with_carry_ */ | |
472 | ||
473 | ||
474 | #ifdef F7MOD_normalize_ | |
475 | ;; Using above functionality from C. | |
476 | ;; f7_t* normalize (f7_t *cc) | |
477 | ;; Adjusts cc->expo | |
478 | ;; Clears cc->flags | |
479 | DEFUN normalize | |
480 | push r17 | |
481 | push r16 | |
482 | wmov ZL, r24 | |
483 | F7call load_mant.clr_CA | |
484 | clr Carry | |
485 | st Z, ZERO | |
486 | F7call normalize.store_with_flags | |
487 | wmov r24, Z | |
488 | pop r16 | |
489 | pop r17 | |
490 | ret | |
491 | ENDF normalize | |
492 | #endif /* F7MOD_normalize_ */ | |
493 | ||
494 | ||
495 | #ifdef F7MOD_store_expo_ | |
496 | #define Done r24 | |
497 | #define expLO r24 | |
498 | #define expHI r25 | |
499 | ;; expo == INT16_MAX => *Z = Inf, return Done = true. | |
500 | ;; expo == INT16_MIN => *Z = 0x0, return Done = true. | |
501 | ;; else => Z->expo = expo, return Done = false. | |
502 | DEFUN store_expo | |
503 | cpi expHI, 0x80 | |
504 | cpc expLO, ZERO | |
505 | breq .Ltiny | |
506 | adiw expLO, 1 | |
507 | brvs .Lhuge | |
508 | sbiw expLO, 1 | |
509 | std Z+0+Expo, expLO | |
510 | std Z+1+Expo, expHI | |
511 | ldi Done, 0 | |
512 | ret | |
513 | ||
514 | .Lhuge: | |
515 | #if F7_HAVE_Inf == 1 | |
516 | ld Done, Z | |
517 | andi Done, F7_FLAG_sign | |
518 | ori Done, F7_FLAG_inf | |
519 | #else | |
520 | ldi Done, F7_FLAG_nan | |
521 | #endif /* Have Inf */ | |
522 | st Z, Done | |
523 | ldi Done, 1 | |
524 | ret | |
525 | ||
526 | .Ltiny: | |
527 | ldi Done, 1 | |
528 | F7jmp clr | |
529 | ENDF store_expo | |
530 | #endif /* F7MOD_store_expo_ */ | |
531 | ||
532 | ||
533 | #ifdef F7MOD_set_u64_ | |
534 | DEFUN set_s64 | |
535 | set | |
536 | skipnext | |
537 | ;; ... | |
538 | LABEL set_u64 | |
539 | clt ; Skipped? | |
540 | wmov Zl, r16 | |
541 | ;; TMP holds .flags. | |
542 | clr TMP | |
543 | brtc .Lnot.negative | |
544 | ||
545 | bst C6, 7 | |
546 | brtc .Lnot.negative | |
547 | bld TMP, F7_FLAGNO_sign | |
548 | .global __negdi2 | |
549 | XCALL __negdi2 | |
550 | ||
551 | .Lnot.negative: | |
552 | st Z, TMP | |
553 | std Z+0+Expo, ZERO | |
554 | std Z+1+Expo, ZERO | |
555 | ldi Carry, 63 | |
556 | F7call normalize.round.store_with_flags | |
557 | wmov r24, Z | |
558 | wmov r16, Z ; Unclobber r16. | |
559 | ret | |
560 | ENDF set_s64 | |
561 | #endif /* F7MOD_set_u64_ */ | |
562 | ||
563 | ||
564 | #ifdef F7MOD_to_integer_ | |
565 | #define Mask r26 | |
566 | DEFUN to_integer | |
567 | wmov ZL, r24 | |
568 | mov Mask, r22 | |
569 | ||
570 | F7call load_mant.with_flags | |
571 | ||
572 | sbrc Flags, F7_FLAGNO_nan | |
573 | rjmp .Lset_0x8000 | |
574 | ||
575 | sbrc Flags, F7_FLAGNO_inf | |
576 | rjmp .Lsaturate | |
577 | ||
578 | sbrs C6, 7 | |
579 | rjmp .Lset_0x0000 | |
580 | ||
581 | bst Flags, F7_FLAGNO_sign | |
582 | ldd r27, Z+0+Expo | |
583 | ;; Does .expo have bits outside Mask? ... | |
584 | mov TMP, Mask | |
585 | com TMP | |
586 | and TMP, r27 | |
587 | ldd r27, Z+1+Expo | |
588 | tst r27 | |
589 | brmi .Lset_0x0000 ; ...yes: .expo is < 0 => return 0 | |
590 | or TMP, r27 | |
591 | brne .Lsaturate.T ; ...yes: .expo > Mask => saturate | |
592 | ||
593 | ;; ...no: Shift right to meet .expo = 0. | |
594 | PUSH r16 | |
595 | ldd r16, Z+0+Expo | |
596 | eor r16, Mask | |
597 | and r16, Mask | |
598 | clr CA | |
599 | F7call lshrdi3 | |
600 | POP r16 | |
601 | tst C6 | |
602 | brmi .Lsaturate.T ; > INTxx_MAX => saturate | |
603 | ||
604 | rcall .Lround | |
605 | brmi .Lsaturate.T ; > INTxx_MAX => saturate | |
606 | ||
607 | brtc 9f ; >= 0 => return | |
608 | sbrc Mask, 5 | |
609 | .global __negdi2 | |
610 | XJMP __negdi2 | |
611 | sbrc Mask, 4 | |
612 | .global __negsi2 | |
613 | XJMP __negsi2 | |
614 | neg C6 | |
615 | neg C5 | |
616 | sbci C6, 0 | |
617 | 9: ret | |
618 | ||
619 | .Lsaturate: | |
620 | bst Flags, F7_FLAGNO_sign | |
621 | .Lsaturate.T: | |
622 | ||
623 | #if F7_HAVE_Inf | |
624 | brtc .Lset_0x7fff | |
625 | ;; -Inf => return 1 + INTxx_MIN | |
626 | mov ZL, Flags | |
627 | .global __clr_8 | |
628 | XCALL __clr_8 | |
629 | ldi C6, 0x80 | |
630 | ||
631 | ldi CA+0, 0x01 | |
632 | ||
633 | sbrs Mask, 5 | |
634 | ldi CA+4, 0x01 | |
635 | ||
636 | sbrs Mask, 4 | |
637 | ldi CA+6, 0x01 | |
638 | ret | |
639 | ||
640 | .Lset_0x7fff: | |
641 | ;; +Inf => return INTxx_MAX | |
642 | sec | |
643 | .global __sbc_8 | |
644 | XCALL __sbc_8 | |
645 | ldi C6, 0x7f | |
646 | ret | |
647 | #endif /* F7_HAVE_Inf */ | |
648 | ||
649 | .Lset_0x8000: | |
650 | ;; NaN => return INTxx_MIN | |
651 | .global __clr_8 | |
652 | XCALL __clr_8 | |
653 | ldi C6, 0x80 | |
654 | ret | |
655 | ||
656 | .Lset_0x0000: | |
657 | ;; Small value => return 0x0 | |
658 | .global __clr_8 | |
659 | XJMP __clr_8 | |
660 | ||
661 | .Lround: | |
662 | ;; C6.7 is known to be 0 here. | |
663 | ;; Return N = 1 iff we have to saturate. | |
664 | cpi Mask, 0xf | |
665 | breq .Lround16 | |
666 | cpi Mask, 0x1f | |
667 | breq .Lround32 | |
668 | ||
669 | ;; For now, no rounding in the 64-bit case. This rounding | |
670 | ;; would have to be integrated into the right-shift. | |
671 | cln | |
672 | ret | |
673 | ||
674 | .Lround32: | |
675 | rol C2 | |
676 | adc C3, ZERO | |
677 | adc C4, ZERO | |
678 | rjmp 2f | |
679 | ||
680 | .Lround16: | |
681 | rol C4 | |
682 | 2: adc C5, ZERO | |
683 | adc C6, ZERO | |
684 | ret | |
685 | ENDF to_integer | |
686 | #endif /* F7MOD_to_integer_ */ | |
687 | ||
688 | ||
689 | #ifdef F7MOD_to_unsigned_ | |
690 | #define Mask r26 | |
691 | DEFUN to_unsigned | |
692 | wmov ZL, r24 | |
693 | mov Mask, r22 | |
694 | ||
695 | F7call load_mant.with_flags | |
696 | ||
697 | sbrc Flags, F7_FLAGNO_nan | |
698 | rjmp .Lset_0xffff | |
699 | ||
700 | sbrc Flags, F7_FLAGNO_sign | |
701 | rjmp .Lset_0x0000 | |
702 | ||
703 | sbrc Flags, F7_FLAGNO_inf | |
704 | rjmp .Lset_0xffff | |
705 | ||
706 | sbrs C6, 7 | |
707 | rjmp .Lset_0x0000 | |
708 | ||
709 | ldd r27, Z+0+Expo | |
710 | ;; Does .expo have bits outside Mask? ... | |
711 | mov TMP, Mask | |
712 | com TMP | |
713 | and TMP, r27 | |
714 | ldd r27, Z+1+Expo | |
715 | tst r27 | |
716 | brmi .Lset_0x0000 ; ...yes: .expo is < 0 => return 0 | |
717 | or TMP, r27 | |
718 | brne .Lset_0xffff ; ...yes: .expo > Mask => saturate | |
719 | ||
720 | ;; ...no: Shift right to meet .expo = 0. | |
721 | PUSH r16 | |
722 | ldd r16, Z+0+Expo | |
723 | eor r16, Mask | |
724 | and r16, Mask | |
725 | clr CA | |
726 | F7call lshrdi3 | |
727 | POP r16 | |
728 | ||
729 | ;; Rounding | |
730 | ;; ??? C6.7 is known to be 0 here. | |
731 | cpi Mask, 0xf | |
732 | breq .Lround16 | |
733 | cpi Mask, 0x1f | |
734 | breq .Lround32 | |
735 | ||
736 | ;; For now, no rounding in the 64-bit case. This rounding | |
737 | ;; would have to be integrated into the right-shift. | |
738 | ret | |
739 | ||
740 | .Lround32: | |
741 | rol C2 | |
742 | adc C3, ZERO | |
743 | adc C4, ZERO | |
744 | rjmp 2f | |
745 | ||
746 | .Lround16: | |
747 | rol C4 | |
748 | 2: adc C5, ZERO | |
749 | adc C6, ZERO | |
750 | brcs .Lset_0xffff ; Rounding overflow => saturate | |
751 | ret | |
752 | ||
753 | .Lset_0xffff: | |
754 | ;; return UINTxx_MAX | |
755 | sec | |
756 | .global __sbc_8 | |
757 | XJMP __sbc_8 | |
758 | ||
759 | .Lset_0x0000: | |
760 | ;; Small value => return 0x0 | |
761 | .global __clr_8 | |
762 | XJMP __clr_8 | |
763 | ||
764 | ENDF to_unsigned | |
765 | #endif /* F7MOD_to_unsigned_ */ | |
766 | ||
767 | ||
768 | #ifdef F7MOD_addsub_mant_scaled_ | |
769 | ;; int8_t f7_addsub_mant_scaled_asm (f7_t *r24, const f7_t *r22, const f7_t 20*, | |
770 | ;; uint8_t r18); | |
771 | ;; R18.0 = 1 : ADD | |
772 | ;; R18.0 = 0 : SUB | |
773 | ;; R18[7..1] : Scale | |
774 | ;; Compute *R24 = *R22 + *R20 >> R18[7..1]. | |
775 | ||
776 | #define BA 10 | |
777 | #define B0 BA+1 | |
778 | #define B1 B0+1 | |
779 | #define B2 B0+2 | |
780 | #define B3 B0+3 | |
781 | #define B4 B0+4 | |
782 | #define B5 B0+5 | |
783 | #define B6 B0+6 | |
784 | ||
785 | DEFUN addsub_mant_scaled | |
786 | do_prologue_saves 10 | |
787 | ||
788 | bst r18, 0 ;; ADD ? | |
789 | lsr r18 | |
790 | mov r16, r18 | |
791 | ||
792 | wmov ZL, r20 | |
793 | wmov YL, r22 | |
794 | ;; C[] = bb >> shift | |
795 | wmov XL, r24 | |
796 | ||
797 | F7call load_mant.clr_CA | |
798 | F7call lshrdi3 | |
799 | ||
800 | wmov BA, CA | |
801 | wmov B1, C1 | |
802 | wmov B3, C3 | |
803 | wmov B5, C5 | |
804 | wmov ZL, YL | |
805 | F7call load_mant.clr_CA | |
806 | ||
807 | wmov ZL, XL | |
808 | ||
809 | brts .Ladd | |
810 | ||
811 | .global __subdi3 | |
812 | XCALL __subdi3 | |
813 | ||
814 | breq .Lzero | |
815 | brcc .Lround | |
816 | ;; C = 1: Can underflow happen at all ? | |
817 | .Lzero: | |
818 | F7call clr | |
819 | rjmp .Lepilogue | |
820 | ||
821 | .Ladd: | |
822 | .global __adddi3 | |
823 | XCALL __adddi3 | |
824 | brcc .Lround | |
825 | ldi Carry, 1 | |
826 | .global __lshrdi3 | |
827 | XCALL __lshrdi3 | |
828 | ori C6, 1 << 7 | |
829 | skipnext | |
830 | .Lround: | |
831 | clr Carry ; skipped? | |
832 | F7call normalize.round.store_with_flags | |
833 | ||
834 | .Lepilogue: | |
835 | do_epilogue_restores 10 | |
836 | ||
837 | ENDF addsub_mant_scaled | |
838 | ||
839 | #if !defined (__AVR_HAVE_MOVW__) || !defined (__AVR_HAVE_JMP_CALL__) | |
840 | DEFUN lshrdi3 | |
841 | .global __lshrdi3 | |
842 | XJMP __lshrdi3 | |
843 | ENDF lshrdi3 | |
844 | DEFUN ashldi3 | |
845 | .global __ashldi3 | |
846 | XJMP __ashldi3 | |
847 | ENDF ashldi3 | |
848 | #else | |
849 | ||
850 | # Basically just a wrapper around libgcc's __lshrdi3. | |
851 | DEFUN lshrdi3 | |
852 | ;; Handle bit 5 of shift offset. | |
853 | sbrs r16, 5 | |
854 | rjmp 4f | |
855 | wmov CA, C3 | |
856 | wmov C1, C5 | |
857 | clr C6 $ clr C5 $ wmov C3, C5 | |
858 | 4: | |
859 | ;; Handle bit 4 of shift offset. | |
860 | sbrs r16, 4 | |
861 | rjmp 3f | |
862 | wmov CA, C1 | |
863 | wmov C1, C3 | |
864 | wmov C3, C5 | |
865 | clr C6 $ clr C5 | |
866 | 3: | |
867 | ;; Handle bits 3...0 of shift offset. | |
868 | push r16 | |
869 | andi r16, 0xf | |
870 | breq 0f | |
871 | ||
872 | .global __lshrdi3 | |
873 | XCALL __lshrdi3 | |
874 | 0: | |
875 | pop r16 | |
876 | ret | |
877 | ENDF lshrdi3 | |
878 | ||
879 | # Basically just a wrapper around libgcc's __ashldi3. | |
880 | DEFUN ashldi3 | |
881 | ;; Handle bit 5 of shift offset. | |
882 | sbrs r16, 5 | |
883 | rjmp 4f | |
884 | wmov C5, C1 | |
885 | wmov C3, CA | |
886 | clr C2 $ clr C1 $ wmov CA, C1 | |
887 | 4: | |
888 | ;; Handle bit 4 of shift offset. | |
889 | sbrs r16, 4 | |
890 | rjmp 3f | |
891 | wmov C5, C3 | |
892 | wmov C3, C1 | |
893 | wmov C1, CA | |
894 | clr CA $ clr C0 | |
895 | 3: | |
896 | ;; Handle bits 3...0 of shift offset. | |
897 | push r16 | |
898 | andi r16, 0xf | |
899 | breq 0f | |
900 | ||
901 | .global __ashldi3 | |
902 | XCALL __ashldi3 | |
903 | 0: | |
904 | pop r16 | |
905 | ret | |
906 | ENDF ashldi3 | |
907 | #endif /* Small device */ | |
908 | ||
909 | #endif /* F7MOD_addsub_mant_scaled_ */ | |
910 | ||
911 | #if defined F7MOD_mul_mant_ && defined (__AVR_HAVE_MUL__) | |
912 | #define A0 11 | |
913 | #define A1 A0+1 | |
914 | #define A2 A0+2 | |
915 | #define A3 A0+3 | |
916 | #define A4 A0+4 | |
917 | #define A5 A0+5 | |
918 | #define A6 A0+6 | |
919 | ||
920 | #define TT0 26 | |
921 | #define TT1 TT0+1 | |
922 | #define TT2 28 | |
923 | #define TT3 TT2+1 | |
924 | ||
925 | #define BB 10 | |
926 | ||
927 | ;; R18.0 = 1: No rounding. | |
928 | ||
929 | DEFUN mul_mant | |
930 | do_prologue_saves 10 | |
931 | bst r18, 0 | |
932 | push r25 | |
933 | push r24 | |
934 | movw ZL, r22 | |
935 | LDD A0, Z+0+Off | |
936 | LDD A1, Z+1+Off | |
937 | LDD A2, Z+2+Off | |
938 | LDD A3, Z+3+Off | |
939 | LDD A4, Z+4+Off | |
940 | LDD A5, Z+5+Off | |
941 | LDD A6, Z+6+Off | |
942 | movw ZL, r20 | |
943 | ||
944 | ;; 6 * 6 -> 6:5 | |
945 | ;; 4 * 6 -> 4:3 | |
946 | ;; 2 * 6 -> 2:1 | |
947 | ;; 0 * 6 -> 0:a | |
948 | ldd BB, Z+6+Off | |
949 | mul A6, BB $ movw C5, r0 | |
950 | mul A4, BB $ movw C3, r0 | |
951 | mul A2, BB $ movw C1, r0 | |
952 | mul A0, BB $ movw CA, r0 | |
953 | ||
954 | ;; 5 * 6 -> 5:4 | |
955 | ;; 3 * 6 -> 3:2 | |
956 | ;; 1 * 6 -> 1:0 | |
957 | mul A5, BB $ movw TT2, r0 | |
958 | mul A3, BB $ movw TT0, r0 | |
959 | mul A1, BB | |
960 | ADD C0, r0 $ adc C1, r1 | |
961 | adc C2, TT0 $ adc C3, TT1 | |
962 | adc C4, TT2 $ adc C5, TT3 $ clr ZERO | |
963 | adc C6, ZERO | |
964 | ;; Done B6 | |
965 | ||
966 | ;; 3 * 3 -> 0:a | |
967 | ;; 4 * 4 -> 2:1 | |
968 | ;; 5 * 5 -> 4:3 | |
969 | ldd BB, Z+3+Off $ mul A3, BB $ movw TT0, r0 | |
970 | ldd BB, Z+4+Off $ mul A4, BB $ movw TT2, r0 | |
971 | ldd BB, Z+5+Off $ mul A5, BB | |
972 | ||
973 | ADD CA, TT0 $ adc C0, TT1 | |
974 | adc C1, TT2 $ adc C2, TT3 | |
975 | adc C3, r0 $ adc C4, r1 | |
976 | brcc .+2 | |
977 | adiw C5, 1 | |
978 | ||
979 | ;; 6 * 5 -> 5:4 | |
980 | ;; 4 * 5 -> 3:2 | |
981 | ;; 2 * 5 -> 1:0 | |
982 | ;; 0 * 5 -> a:- | |
983 | mul A0, BB | |
984 | ;; A0 done | |
985 | #define Atmp A0 | |
986 | ||
987 | mov Atmp, r1 | |
988 | mul A6, BB $ movw TT2, r0 | |
989 | mul A4, BB $ movw TT0, r0 | |
990 | mul A2, BB | |
991 | ||
992 | ADD CA, Atmp | |
993 | adc C0, r0 $ adc C1, r1 | |
994 | adc C2, TT0 $ adc C3, TT1 | |
995 | adc C4, TT2 $ adc C5, TT3 $ clr ZERO | |
996 | adc C6, ZERO | |
997 | ||
998 | ;; 1 * 5 -> 0:a | |
999 | ;; 3 * 5 -> 2:1 | |
1000 | ;; 6 * 4 -> 4:3 | |
1001 | mul A1, BB $ movw TT0, r0 | |
1002 | mul A3, BB $ movw TT2, r0 | |
1003 | ldd BB, Z+4+Off | |
1004 | mul A6, BB | |
1005 | ||
1006 | ADD CA, TT0 $ adc C0, TT1 | |
1007 | adc C1, TT2 $ adc C2, TT3 | |
1008 | adc C3, r0 $ adc C4, r1 $ clr ZERO | |
1009 | adc C5, ZERO $ adc C6, ZERO | |
1010 | ;; B5 done | |
1011 | ||
1012 | ;; 6 * 3 -> 3:2 | |
1013 | ;; 6 * 1 -> 1:0 | |
1014 | ;; 4 * 1 -> a:- | |
1015 | mov TT0, A6 $ ldd TMP, Z+3+Off | |
1016 | mov BB, A4 $ ldd Atmp, Z+1+Off | |
1017 | rcall .Lmul.help.3 | |
1018 | ||
1019 | ;; 5 * 4 -> 3:2 | |
1020 | ;; 5 * 2 -> 1:0 | |
1021 | ;; 3 * 2 -> a:- | |
1022 | mov TT0, A5 $ ldd TMP, Z+4+Off | |
1023 | mov BB, A3 $ ldd Atmp, Z+2+Off | |
1024 | rcall .Lmul.help.3 | |
1025 | ||
1026 | ;; 4 * -> 3:2 (=0) | |
1027 | ;; 4 * 3 -> 1:0 | |
1028 | ;; 2 * 3 -> a:- | |
1029 | mov TT0, A4 $ clr TMP | |
1030 | mov BB, A2 $ ldd Atmp, Z+3+Off | |
1031 | rcall .Lmul.help.3 | |
1032 | ||
1033 | ;; 3 * . -> 3:2 (=0) | |
1034 | ;; 3 * 4 -> 1:0 | |
1035 | ;; 1 * 4 -> a:- | |
1036 | mov TT0, A3 $ clr TMP | |
1037 | mov BB, A1 $ ldd Atmp, Z+4+Off | |
1038 | rcall .Lmul.help.3 | |
1039 | ||
1040 | ;; . * ? -> 3:2 (=0) | |
1041 | ;; . * 0 -> 1:0 (=0) | |
1042 | ;; 5 * 0 -> a:- | |
1043 | clr TT0 | |
1044 | mov BB, A5 $ ldd Atmp, Z+0+Off | |
1045 | rcall .Lmul.help.3 | |
1046 | ||
1047 | clr TT3 ;; Asserted by .Lmul.help.2 | |
1048 | ;; 6 * 2 -> 2:1 | |
1049 | ;; 6 * 0 -> 0:a | |
1050 | $ ldd TMP, Z+2+Off | |
1051 | mov BB, A6 ;$ ldd Atmp, Z+0+Off | |
1052 | rcall .Lmul.help.2 | |
1053 | ||
1054 | ;; 5 * 3 -> 2:1 | |
1055 | ;; 5 * 1 -> 0:a | |
1056 | $ ldd TMP, Z+3+Off | |
1057 | mov BB, A5 $ ldd Atmp, Z+1+Off | |
1058 | rcall .Lmul.help.2 | |
1059 | ||
1060 | ;; 4 * . -> 2:1 (=0) | |
1061 | ;; 4 * 2 -> 0:a | |
1062 | $ clr TMP | |
1063 | mov BB, A4 $ ldd Atmp, Z+2+Off | |
1064 | rcall .Lmul.help.2 | |
1065 | ||
1066 | ;; 2 * . -> 2:1 (=0) | |
1067 | ;; 2 * 4 -> 0:a | |
1068 | $ clr TMP | |
1069 | mov BB, A2 $ ldd Atmp, Z+4+Off | |
1070 | rcall .Lmul.help.2 | |
1071 | ||
1072 | ;; Finally... | |
1073 | ||
1074 | pop ZL | |
1075 | pop ZH | |
1076 | ;; The high byte is at least 0x40 and at most 0xfe. | |
1077 | ;; The result has to be left-shifted by one in order to scale it | |
1078 | ;; correctly. | |
1079 | ||
1080 | ldi Carry, 1 | |
1081 | F7call normalize.maybe_round.store_with_flags | |
1082 | ||
1083 | do_epilogue_restores 10 | |
1084 | ||
1085 | ;; TT0 * Tmp -> 3:2 | |
1086 | ;; TT0 * Atmp -> 1:0 | |
1087 | ;; BB * Atmp -> a:- | |
1088 | ;; | |
1089 | ;; Clobbers : TMP, TT0...TT3. | |
1090 | ;; Sets : ZERO = 0. | |
1091 | .Lmul.help.3: | |
1092 | mul TT0, TMP $ movw TT2, r0 | |
1093 | mul TT0, Atmp $ movw TT0, r0 | |
1094 | mul BB, Atmp | |
1095 | ||
1096 | ADD CA, r1 | |
1097 | adc C0, TT0 $ adc C1, TT1 | |
1098 | adc C2, TT2 | |
1099 | .Lmul.help.3.C3: $ adc C3, TT3 $ clr ZERO | |
1100 | adc C4, ZERO $ adc C5, ZERO | |
1101 | adc C6, ZERO | |
1102 | ret | |
1103 | ||
1104 | ;; BB * TMP -> 2:1 | |
1105 | ;; BB * Atmp -> 0:a | |
1106 | ;; | |
1107 | ;; Asserts : TT3 = 0 | |
1108 | ;; Clobbers : TMP, TT0, TT1. | |
1109 | ;; Sets : ZERO = 0. | |
1110 | .Lmul.help.2: | |
1111 | mul BB, TMP $ movw TT0, r0 | |
1112 | mul BB, Atmp | |
1113 | ADD CA, r0 $ adc C0, r1 | |
1114 | adc C1, TT0 $ adc C2, TT1 | |
1115 | rjmp .Lmul.help.3.C3 | |
1116 | ||
1117 | ENDF mul_mant | |
1118 | #endif /* F7MOD_mul_mant_ && MUL */ | |
1119 | ||
1120 | ||
1121 | #if defined (F7MOD_div_) | |
1122 | ||
1123 | ;; Dividend is C[] | |
1124 | ||
1125 | ;; Divisor | |
1126 | #define A0 9 | |
1127 | #define A1 10 | |
1128 | #define A2 11 | |
1129 | #define A3 12 | |
1130 | #define A4 13 | |
1131 | #define A5 14 | |
1132 | #define A6 15 | |
1133 | ||
1134 | ;; Quotient | |
1135 | #define Q0 0 /* === TMP */ | |
1136 | #define Q1 Q0+1 /* === ZERO */ | |
1137 | #define Q2 26 | |
1138 | #define Q3 Q2+1 | |
1139 | #define Q4 28 | |
1140 | #define Q5 Q4+1 | |
1141 | #define Q6 16 | |
1142 | #define Q7 Q6+1 | |
1143 | ||
1144 | #define Cnt CA | |
1145 | #define QBits r8 | |
1146 | ||
1147 | DEFUN div | |
1148 | do_prologue_saves 12 | |
1149 | ||
1150 | ;; Number of bits requested for the quotient. | |
1151 | ;; This is usually 2 + F7_MANT_BITS. | |
1152 | mov QBits, r20 | |
1153 | wmov ZL, r22 | |
1154 | LDD A0, Z+0+Off | |
1155 | LDD A1, Z+1+Off | |
1156 | LDD A2, Z+2+Off | |
1157 | LDD A3, Z+3+Off | |
1158 | LDD A4, Z+4+Off | |
1159 | LDD A5, Z+5+Off | |
1160 | LDD A6, Z+6+Off | |
1161 | wmov ZL, r24 | |
1162 | F7call load_mant | |
1163 | ||
1164 | ;; Clear quotient Q[]. | |
1165 | clr Q0 ; === TMP | |
1166 | ;clr Q1 ; === ZERO | |
1167 | wmov Q2, Q0 | |
1168 | wmov Q4, Q0 | |
1169 | wmov Q6, Q0 | |
1170 | ||
1171 | ;; C[] and A[] are valid mantissae, i.e. their MSBit is set. Therefore, | |
1172 | ;; quotient Q[] will be in [0x0.ff..., 0x0.40...] and to adjust Q[] we | |
1173 | ;; need at most 1 left-shift. Compute F7_MANT_BITS + 2 bits of the | |
1174 | ;; quotient: One bit is used for rounding, and one bit might be consumed | |
1175 | ;; by the mentioned left-shift. | |
1176 | mov Cnt, QBits | |
1177 | rjmp .Loop_start | |
1178 | ||
1179 | .Loop: | |
1180 | ;; Shift dividend. | |
1181 | LSL C0 | |
1182 | rol C1 | |
1183 | rol C2 | |
1184 | rol C3 | |
1185 | rol C4 | |
1186 | rol C5 | |
1187 | rol C6 | |
1188 | brcs .Lfits | |
1189 | ;; Compare dividend against divisor. | |
1190 | .Loop_start: | |
1191 | CP C0, A0 | |
1192 | cpc C1, A1 | |
1193 | cpc C2, A2 | |
1194 | cpc C3, A3 | |
1195 | cpc C4, A4 | |
1196 | cpc C5, A5 | |
1197 | cpc C6, A6 | |
1198 | ;; Shift 0 into quotient. | |
1199 | brlo 1f | |
1200 | .Lfits: | |
1201 | ;; Divisor fits into dividend. | |
1202 | SUB C0, A0 | |
1203 | sbc C1, A1 | |
1204 | sbc C2, A2 | |
1205 | sbc C3, A3 | |
1206 | sbc C4, A4 | |
1207 | sbc C5, A5 | |
1208 | sbc C6, A6 | |
1209 | ;; Shift 1 into quotient. | |
1210 | sec | |
1211 | rol Q0 | |
1212 | skipnext | |
1213 | 1: lsl Q0 | |
1214 | rol Q1 | |
1215 | rol Q2 | |
1216 | rol Q3 | |
1217 | rol Q4 | |
1218 | rol Q5 | |
1219 | rol Q6 | |
1220 | rol Q7 | |
1221 | dec Cnt | |
1222 | brne .Loop | |
1223 | ||
1224 | wmov CA, Q0 | |
1225 | wmov C1, Q2 | |
1226 | wmov C3, Q4 | |
1227 | wmov C5, Q6 | |
1228 | clr ZERO | |
1229 | ||
1230 | ldi Carry, 64 | |
1231 | sub Carry, QBits | |
1232 | F7call normalize.round.store_with_flags | |
1233 | ||
1234 | do_epilogue_restores 12 | |
1235 | ENDF div | |
1236 | ||
1237 | #endif /* F7MOD_div_ */ | |
1238 | ||
1239 | ||
1240 | #if defined (F7MOD_sqrt16_) && defined (__AVR_HAVE_MUL__) | |
1241 | ||
1242 | #define Mask C6 | |
1243 | #define Q0 C3 /* = R22 */ | |
1244 | #define Q1 C4 /* = R23 */ | |
1245 | ||
1246 | ;; uint16_t R24 = sqrt16_XXX (uint16_t R24); | |
1247 | ;; Clobbers: R22, R23, TMP. | |
1248 | ;; | |
1249 | ;; XXX = floor: Return integral part of square-root of R25:R24 with R25 = 0. | |
1250 | ;; Error is in [0, -1 LSB). | |
1251 | ;; XXX = round: Return quare-root of R25:R24 rounded to nearest integer. | |
1252 | ;; R25 = (Q[] >= 65281) = (Q > 0xff00), i.e. if Q[] is not | |
1253 | ;; bigger than 0xff00, then the result fits in 8 bits. | |
1254 | ;; Return C = 0 if the result is the same as for XXX = floor, | |
1255 | ;; error in [0, -1/2 LSB) | |
1256 | ;; Return C = 1 if the result is one higher than for XXX = floor, | |
1257 | ;; error in [1/2 LSB, 0). | |
1258 | DEFUN sqrt16_round | |
1259 | set | |
1260 | skipnext | |
1261 | ;; ... | |
1262 | LABEL sqrt16_floor | |
1263 | clt ; Skipped? | |
1264 | movw Q0, r24 | |
1265 | clr C5 | |
1266 | ldi Mask, 1 << 7 | |
1267 | ||
1268 | .Loop_mask: | |
1269 | add C5, Mask | |
1270 | mul C5, C5 | |
1271 | cp Q0, R0 | |
1272 | cpc Q1, R1 | |
1273 | brsh 1f | |
1274 | sub C5, Mask | |
1275 | 1: lsr Mask | |
1276 | brne .Loop_mask | |
1277 | ||
1278 | brtc .Ldone ; No rounding => C6 will be 0. | |
1279 | ||
1280 | ;; Rounding: (X + 1/2)^2 = X^2 + X + 1/4, thus probing | |
1281 | ;; for bit -1 is testing Q[] against C5^2 + C5. | |
1282 | mul C5, C5 | |
1283 | add R0, C5 | |
1284 | adc R1, C6 ; Exploit C6 === Mask = 0. | |
1285 | cp R0, Q0 | |
1286 | cpc R1, Q1 | |
1287 | brcc .Ldone | |
1288 | ;; If C5^2 + C5 + 1/4 fits into Q[], then round up and C = 1. | |
1289 | adiw C5, 1 ; Exploit C6 === Mask = 0. | |
1290 | sec | |
1291 | ||
1292 | .Ldone: | |
1293 | clr __zero_reg__ | |
1294 | ret | |
1295 | ENDF sqrt16_round | |
1296 | #undef Mask | |
1297 | #undef Q0 | |
1298 | #undef Q1 | |
1299 | #endif /* F7MOD_sqrt16_ && MUL */ | |
1300 | ||
1301 | #ifdef F7MOD_sqrt_approx_ | |
1302 | DEFUN sqrt_approx | |
1303 | push r17 | |
1304 | push r16 | |
1305 | wmov XL, r24 | |
1306 | wmov ZL, r22 | |
1307 | ||
1308 | ;; C[] = 0. | |
1309 | .global __clr_8 | |
1310 | XCALL __clr_8 | |
1311 | ||
1312 | ldd C5, Z+5+Off | |
1313 | ldd C6, Z+6+Off | |
1314 | ||
1315 | ldd Carry, Z+0+Expo | |
1316 | ldd TMP, Z+1+Expo | |
1317 | wmov ZL, XL | |
1318 | ||
1319 | st Z, ZERO | |
1320 | ||
1321 | asr TMP | |
1322 | ror Carry | |
1323 | std Z+1+Expo, TMP | |
1324 | std Z+0+Expo, Carry | |
1325 | ||
1326 | ;; Re-interpreting our Q-format 1.xx mantissa as Q2.yy, we have to shift | |
1327 | ;; the mantissa to the right by 1. As we need an even exponent, multiply | |
1328 | ;; the mantissa by 2 for odd exponents, i.e. only right-shift if .expo | |
1329 | ;; is even. | |
1330 | ||
1331 | brcs 1f | |
1332 | lsr C6 | |
1333 | ror C5 | |
1334 | ||
1335 | 1: | |
1336 | F7call sqrt16_round | |
1337 | ||
1338 | ;; sqrt16_round() returns: C = 0: error in [0, -1/2 LSB). | |
1339 | ;; C = 1: error in [1/2 LSB, 0) | |
1340 | ||
1341 | brcc 2f | |
1342 | ;; Undo the round-up from sqrt16_round(); this will transform to | |
1343 | ;; error in [-1/2 LSB, -1 LSB). | |
1344 | sbiw C5, 1 | |
1345 | ;; Together with the correct bit C4.7, the error is in [0, -1/2 LSB). | |
1346 | ori C4, 1 << 7 | |
1347 | ||
1348 | 2: ;; Setting C4.6 adds 1/4 LSB and the error is now in [1/4 LSB, -1/4 LSB) | |
1349 | ;; in either case. | |
1350 | ori C4, 1 << 6 | |
1351 | ||
1352 | ;; ???????????? | |
1353 | ;; sqrt16_round() runs on integers which means that it computes the | |
1354 | ;; square root of mant * 2^14 if we regard mant as Q-format 2.yy, | |
1355 | ;; i.e. 2 integral bits. The result is sqrt(mant) * 2^7, | |
1356 | ;; and in order to get the same scaling like the input, .expo has to | |
1357 | ;; be adjusted by 7. ??????????????? | |
1358 | ||
1359 | ldi Carry, 8 | |
1360 | F7call normalize.store_with_flags | |
1361 | ||
1362 | pop r16 | |
1363 | pop r17 | |
1364 | ret | |
1365 | ||
1366 | ENDF sqrt_approx | |
1367 | #endif /* F7MOD_sqrt_approx_ */ | |
1368 | ||
1369 | ||
1370 | #undef CA | |
1371 | #undef C0 | |
1372 | #undef C1 | |
1373 | #undef C2 | |
1374 | #undef C3 | |
1375 | #undef C4 | |
1376 | #undef C5 | |
1377 | #undef C6 | |
1378 | #undef Carry | |
1379 | ||
1380 | ||
1381 | #ifdef F7MOD_D_fabs_ | |
1382 | _DEFUN __fabs | |
1383 | DALIAS fabs | |
1384 | LALIAS fabsl | |
1385 | andi R25, 0b01111111 | |
1386 | ret | |
1387 | _ENDF __fabs | |
1388 | #endif /* F7MOD_D_fabs_ */ | |
1389 | ||
1390 | ||
1391 | #ifdef F7MOD_D_neg_ | |
1392 | _DEFUN __neg | |
1393 | _LABEL __negdf2 | |
1394 | subi R25, 0b10000000 | |
1395 | ret | |
1396 | _ENDF __neg | |
1397 | #endif /* F7MOD_D_neg_ */ | |
1398 | ||
1399 | ||
1400 | #ifdef F7MOD_D_signbit_ | |
1401 | _DEFUN __signbit | |
1402 | DALIAS signbit | |
1403 | LALIAS signbitl | |
1404 | bst R25, 7 | |
1405 | clr R25 | |
1406 | clr R24 | |
1407 | bld R24, 0 | |
1408 | ret | |
1409 | _ENDF __signbit | |
1410 | #endif /* F7MOD_D_signbit_ */ | |
1411 | ||
1412 | ||
1413 | #ifdef F7MOD_D_copysign_ | |
1414 | _DEFUN __copysign | |
1415 | DALIAS copysign | |
1416 | LALIAS copysignl | |
1417 | bst R17, 7 | |
1418 | bld R25, 7 | |
1419 | ret | |
1420 | _ENDF __copysign | |
1421 | #endif /* F7MOD_D_copysign_ */ | |
1422 | ||
1423 | ||
1424 | #ifdef F7MOD_D_isinf_ | |
1425 | _DEFUN __isinf | |
1426 | DALIAS isinf | |
1427 | LALIAS isinfl | |
1428 | F7call class_D | |
1429 | ;; Inf: T = Z = 1. | |
1430 | brtc 0f | |
1431 | ldi R24, 1 | |
1432 | breq 1f | |
1433 | 0: | |
1434 | clr R24 | |
1435 | 1: | |
1436 | clr R25 | |
1437 | ret | |
1438 | _ENDF __isinf | |
1439 | #endif /* F7MOD_D_isinf_ */ | |
1440 | ||
1441 | ||
1442 | #ifdef F7MOD_D_isnan_ | |
1443 | _DEFUN __isnan | |
1444 | DALIAS isnan | |
1445 | LALIAS isnanl | |
1446 | F7call class_D | |
1447 | ;; NaN: T = 1, Z = 0. | |
1448 | brtc 0f | |
1449 | ldi R24, 1 | |
1450 | brne 1f | |
1451 | 0: | |
1452 | clr R24 | |
1453 | 1: | |
1454 | clr R25 | |
1455 | ret | |
1456 | _ENDF __isnan | |
1457 | #endif /* F7MOD_D_isnan_ */ | |
1458 | ||
1459 | ||
1460 | #ifdef F7MOD_D_isfinite_ | |
1461 | _DEFUN __isfinite | |
1462 | DALIAS isfinite | |
1463 | LALIAS isfinitel | |
1464 | F7call class_D | |
1465 | ;; Number <=> T = 0. | |
1466 | bld R24, 0 | |
1467 | com R24 | |
1468 | andi R24, 1 | |
1469 | clr R25 | |
1470 | ret | |
1471 | _ENDF __isfinite | |
1472 | #endif /* F7MOD_D_isfinite_ */ | |
1473 | ||
1474 | ||
1475 | #ifdef F7MOD_D_class_ | |
1476 | ;; The encoded exponent has 11 Bits. | |
1477 | #define MAX_BIASED_EXPO 0b0111111111110000 | |
1478 | ||
1479 | ;; Classify a double in R18[] | |
1480 | ;; Number: T-Flag = 0. | |
1481 | ;; +-Inf : T-Flag = 1, Z-Flag = 1. | |
1482 | ;; NaN : T-Flag = 1, Z-Flag = 0. | |
1483 | DEFUN class_D | |
1484 | wmov R26, R24 | |
1485 | andi R26, lo8 (MAX_BIASED_EXPO) | |
1486 | andi R27, hi8 (MAX_BIASED_EXPO) | |
1487 | subi R26, lo8 (MAX_BIASED_EXPO) | |
1488 | sbci R27, hi8 (MAX_BIASED_EXPO) | |
1489 | clt | |
1490 | brne .L.number | |
1491 | set | |
1492 | ;; Set sign and expo to 0. | |
1493 | clr R25 | |
1494 | andi R24, lo8 (~MAX_BIASED_EXPO) | |
1495 | ;; What remains is the mantissa. | |
1496 | ;; Mantissa == 0 => +/-Inf. | |
1497 | ;; Mantissa != 0 => NaN. | |
1498 | ;; Compare R18[] against sign_extend(R26) with R26 = 0. | |
1499 | .global __cmpdi2_s8 | |
1500 | XJMP __cmpdi2_s8 | |
1501 | .L.number: | |
1502 | ret | |
1503 | ||
1504 | ENDF class_D | |
1505 | #endif /* F7MOD_D_class_ */ | |
1506 | ||
1507 | ||
1508 | #ifdef F7MOD_call_dd_ | |
1509 | ||
1510 | ;; Provide double wrappers for functions that operate on f7_t and get f7_t*. | |
1511 | ;; | |
1512 | ;; We set up a frame of sizeof(f7_t), convert the input double in R18[] to | |
1513 | ;; f7_t in that frame location, then call *Z and finally convert the result f7_t | |
1514 | ;; to double R18[] if that's requested. | |
1515 | ;; | |
1516 | ;; call_dd: double func (double A) | |
1517 | ;; void (*Z) (f7_t *aa, const f7_t *aa) | |
1518 | ;; | |
1519 | ;; call_dx: double func (type_t A) , sizeof(type_t) <= 4 | |
1520 | ;; void (*Z) (f7_t *aa, type_t) | |
1521 | ;; | |
1522 | ;; call_xd: type_t func (double A) | |
1523 | ;; type_t (*Z) (const f7_t *aa) | |
1524 | ;; | |
1525 | ;; call_ddx: double func (double A, word_t) , sizeof (word_t) <= 2 | |
1526 | ;; void (*Z) (f7_t *aa, const f7_t *aa, word_t) | |
1527 | ||
1528 | #define WHAT R13 | |
1529 | ||
1530 | DEFUN call_dd ; WHAT = R13 = 3 | |
1531 | inc ZERO | |
1532 | LABEL call_xd ; WHAT = R13 = 2 | |
1533 | inc ZERO | |
1534 | LABEL call_ddx ; WHAT = R13 = 1 | |
1535 | inc ZERO | |
1536 | LABEL call_dx ; WHAT = R13 = 0 | |
1537 | push WHAT | |
1538 | mov WHAT, ZERO | |
1539 | clr ZERO | |
1540 | ;; R14/R15 hold Z, the address of the f7_worker function, until we need it. | |
1541 | push r14 | |
1542 | push r15 | |
1543 | wmov r14, Z | |
1544 | ||
1545 | #define n_pushed 4 | |
1546 | #define n_frame 10 | |
1547 | ||
1548 | do_prologue_saves n_pushed, n_frame | |
1549 | ;; Y = FramePointer + 1 | |
1550 | adiw Y, 1 | |
1551 | dec WHAT | |
1552 | brmi .Ldx ; WHAT was initially 0. | |
1553 | ;; FP + 1 = (f7_t) arg1 | |
1554 | wmov r16, Y | |
1555 | ;; The double argument is in R18[]. | |
1556 | XCALL F7_NAME (set_double_impl) | |
1557 | tst WHAT | |
1558 | brne .Lno.ddx ; WHAT was initially != 1. | |
1559 | ;; call_ddx: Set R20/21 to the 2-byte scalar / pointer argument. | |
1560 | ;; Fetch it from where prologue_saves put it. | |
1561 | ldd r20, Y + n_frame + 3 ; Saved R16 | |
1562 | ldd r21, Y + n_frame + 2 ; Saved R17 | |
1563 | .Lno.ddx: | |
1564 | wmov r22, Y ; &arg1 (input) | |
1565 | .Ldo.dx: | |
1566 | wmov r24, Y ; &arg1 (output) | |
1567 | wmov Z, r14 | |
1568 | XICALL | |
1569 | dec WHAT | |
1570 | breq .Lepilogue ; WHAT was initially 2: Return non-double. | |
1571 | wmov r24, Y ; &arg1 | |
1572 | XCALL F7_NAME (get_double) | |
1573 | .Lepilogue: | |
1574 | ;; + 3 to account for R13...R15 pushed prior to do_prologue_saves. | |
1575 | do_epilogue_restores n_pushed + 3, n_frame | |
1576 | ||
1577 | .Ldx: | |
1578 | ;; call_dx: Copy the 4-byte input scalar from R22[4] to R20[4]. | |
1579 | wmov r20, r22 | |
1580 | wmov r22, r24 | |
1581 | rjmp .Ldo.dx | |
1582 | ||
1583 | ENDF call_dd | |
1584 | #endif /* F7MOD_call_dd_ */ | |
1585 | ||
1586 | ||
1587 | #ifdef F7MOD_call_ddd_ | |
1588 | ||
1589 | ;; Provide double wrappers for functions that operate on f7_t and get f7_t*. | |
1590 | ;; | |
1591 | ;; We set up a frame of 2 * sizeof(f7_t), convert the input doubles in R18[] | |
1592 | ;; and R10[] to f7_t in these frame locations, then call *Z and finally | |
1593 | ;; convert the result f7_t to double R18[] if that's requested. | |
1594 | ;; | |
1595 | ;; call_ddd: double func (double A, double B) | |
1596 | ;; void (*Z) (f7_t *aa, const f7_t *aa, const f7_t *bb) | |
1597 | ;; | |
1598 | ;; call_xdd: type_t func (double A, double B) | |
1599 | ;; type_t (*Z) (const f7_t *aa, const f7_t *bb) | |
1600 | ||
1601 | DEFUN call_ddd | |
1602 | inc ZERO | |
1603 | LABEL call_xdd | |
1604 | ;; R8/R9 hold Z, the address of the f7_worker function, until we need it. | |
1605 | push r9 | |
1606 | push r8 | |
1607 | wmov r8, Z | |
1608 | ;; This is an argument to call.2 and will be accessed by the arg pointer. | |
1609 | push ZERO | |
1610 | clr ZERO | |
1611 | rcall call.2 | |
1612 | pop TMP | |
1613 | pop r8 | |
1614 | pop r9 | |
1615 | ret | |
1616 | ||
1617 | #define n_pushed 4 | |
1618 | #define n_frame 20 | |
1619 | ||
1620 | call.2: | |
1621 | do_prologue_saves n_pushed, n_frame | |
1622 | ;; Y = FramePointer + 1 | |
1623 | adiw Y, 1 | |
1624 | ;; FP + 1 = (f7_t) arg1 | |
1625 | wmov r16, Y | |
1626 | ;; First double argument is already in R18[]. | |
1627 | XCALL F7_NAME (set_double_impl) | |
1628 | ;; FP + 11 = (f7_t) arg2 | |
1629 | wmov r16, Y | |
1630 | subi r16, lo8 (-10) | |
1631 | sbci r17, hi8 (-10) | |
1632 | ;; Move second double argument to R18[]. | |
1633 | wmov r18, r10 | |
1634 | wmov r20, r12 | |
1635 | wmov r22, r14 | |
1636 | ;; Get high word of arg2 from where prologue_saves put it. | |
1637 | ldd r24, Y + n_frame + 3 ; Saved R16 | |
1638 | ldd r25, Y + n_frame + 2 ; Saved R17 | |
1639 | XCALL F7_NAME (set_double_impl) | |
1640 | ;; Z (f7_t *arg1, const f7_t *arg1, const f7_t *arg2) | |
1641 | wmov Z, r8 | |
1642 | wmov r24, Y ; &arg1 | |
1643 | ;; WHAT == 0 => call_xdd | |
1644 | ;; WHAT != 0 => call_ddd | |
1645 | ldd TMP, Y + n_frame + n_pushed + PC_SIZE | |
1646 | tst TMP | |
1647 | breq .Lxdd | |
1648 | wmov r22, Y ; &arg1 | |
1649 | wmov r20, r16 ; &arg2 | |
1650 | XICALL | |
1651 | wmov r24, Y ; &arg1 | |
1652 | XCALL F7_NAME (get_double) | |
1653 | .Lepilogue: | |
1654 | do_epilogue_restores n_pushed, n_frame | |
1655 | .Lxdd: | |
1656 | wmov r22, r16 ; &arg2 | |
1657 | XICALL | |
1658 | rjmp .Lepilogue | |
1659 | ENDF call_ddd | |
1660 | #endif /* F7MOD_call_ddd_ */ | |
1661 | ||
1662 | #include "f7-wraps.h" | |
1663 | ||
1664 | #endif /* !AVR_TINY */ |