]> git.ipfire.org Git - thirdparty/gcc.git/blame - libgcc/config/sh/lib1funcs.S
Update copyright years.
[thirdparty/gcc.git] / libgcc / config / sh / lib1funcs.S
CommitLineData
f1717362 1/* Copyright (C) 1994-2016 Free Software Foundation, Inc.
a85a2db0 2
3This file is free software; you can redistribute it and/or modify it
4under the terms of the GNU General Public License as published by the
6bc9506f 5Free Software Foundation; either version 3, or (at your option) any
a85a2db0 6later version.
7
a85a2db0 8This file is distributed in the hope that it will be useful, but
9WITHOUT ANY WARRANTY; without even the implied warranty of
10MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11General Public License for more details.
12
6bc9506f 13Under Section 7 of GPL version 3, you are granted additional
14permissions described in the GCC Runtime Library Exception, version
153.1, as published by the Free Software Foundation.
16
17You should have received a copy of the GNU General Public License and
18a copy of the GCC Runtime Library Exception along with this program;
19see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
20<http://www.gnu.org/licenses/>. */
21
a85a2db0 22
7b23765d 23!! libgcc routines for the Renesas / SuperH SH CPUs.
a85a2db0 24!! Contributed by Steve Chamberlain.
25!! sac@cygnus.com
26
30b0bb86 27!! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines
28!! recoded in assembly by Toshiyasu Morita
29!! tm@netcom.com
a85a2db0 30
cea9fc42 31#if defined(__ELF__) && defined(__linux__)
32.section .note.GNU-stack,"",%progbits
33.previous
34#endif
35
d73f1571 36/* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and
37 ELF local label prefixes by J"orn Rennecke
38 amylaar@cygnus.com */
39
2fa59365 40#include "lib1funcs.h"
7105fb72 41
a9cfe83b 42/* t-vxworks needs to build both PIC and non-PIC versions of libgcc,
43 so it is more convenient to define NO_FPSCR_VALUES here than to
44 define it on the command line. */
45#if defined __vxworks && defined __PIC__
46#define NO_FPSCR_VALUES
47#endif
48
87e19636 49#if ! __SH5__
a85a2db0 50#ifdef L_ashiftrt
16f1dae0 51 .global GLOBAL(ashiftrt_r4_0)
52 .global GLOBAL(ashiftrt_r4_1)
53 .global GLOBAL(ashiftrt_r4_2)
54 .global GLOBAL(ashiftrt_r4_3)
55 .global GLOBAL(ashiftrt_r4_4)
56 .global GLOBAL(ashiftrt_r4_5)
57 .global GLOBAL(ashiftrt_r4_6)
58 .global GLOBAL(ashiftrt_r4_7)
59 .global GLOBAL(ashiftrt_r4_8)
60 .global GLOBAL(ashiftrt_r4_9)
61 .global GLOBAL(ashiftrt_r4_10)
62 .global GLOBAL(ashiftrt_r4_11)
63 .global GLOBAL(ashiftrt_r4_12)
64 .global GLOBAL(ashiftrt_r4_13)
65 .global GLOBAL(ashiftrt_r4_14)
66 .global GLOBAL(ashiftrt_r4_15)
67 .global GLOBAL(ashiftrt_r4_16)
68 .global GLOBAL(ashiftrt_r4_17)
69 .global GLOBAL(ashiftrt_r4_18)
70 .global GLOBAL(ashiftrt_r4_19)
71 .global GLOBAL(ashiftrt_r4_20)
72 .global GLOBAL(ashiftrt_r4_21)
73 .global GLOBAL(ashiftrt_r4_22)
74 .global GLOBAL(ashiftrt_r4_23)
75 .global GLOBAL(ashiftrt_r4_24)
76 .global GLOBAL(ashiftrt_r4_25)
77 .global GLOBAL(ashiftrt_r4_26)
78 .global GLOBAL(ashiftrt_r4_27)
79 .global GLOBAL(ashiftrt_r4_28)
80 .global GLOBAL(ashiftrt_r4_29)
81 .global GLOBAL(ashiftrt_r4_30)
82 .global GLOBAL(ashiftrt_r4_31)
83 .global GLOBAL(ashiftrt_r4_32)
a85a2db0 84
59312820 85 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_0))
86 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_1))
87 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_2))
88 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_3))
89 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_4))
90 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_5))
91 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_6))
92 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_7))
93 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_8))
94 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_9))
95 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_10))
96 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_11))
97 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_12))
98 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_13))
99 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_14))
100 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_15))
101 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_16))
102 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_17))
103 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_18))
104 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_19))
105 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_20))
106 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_21))
107 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_22))
108 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_23))
109 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_24))
110 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_25))
111 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_26))
112 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_27))
113 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_28))
114 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_29))
115 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_30))
116 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_31))
117 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_32))
805e22b2 118
30b0bb86 119 .align 1
16f1dae0 120GLOBAL(ashiftrt_r4_32):
121GLOBAL(ashiftrt_r4_31):
30b0bb86 122 rotcl r4
123 rts
124 subc r4,r4
125
16f1dae0 126GLOBAL(ashiftrt_r4_30):
a85a2db0 127 shar r4
16f1dae0 128GLOBAL(ashiftrt_r4_29):
a85a2db0 129 shar r4
16f1dae0 130GLOBAL(ashiftrt_r4_28):
a85a2db0 131 shar r4
16f1dae0 132GLOBAL(ashiftrt_r4_27):
a85a2db0 133 shar r4
16f1dae0 134GLOBAL(ashiftrt_r4_26):
a85a2db0 135 shar r4
16f1dae0 136GLOBAL(ashiftrt_r4_25):
a85a2db0 137 shar r4
16f1dae0 138GLOBAL(ashiftrt_r4_24):
30b0bb86 139 shlr16 r4
140 shlr8 r4
141 rts
142 exts.b r4,r4
143
16f1dae0 144GLOBAL(ashiftrt_r4_23):
a85a2db0 145 shar r4
16f1dae0 146GLOBAL(ashiftrt_r4_22):
a85a2db0 147 shar r4
16f1dae0 148GLOBAL(ashiftrt_r4_21):
a85a2db0 149 shar r4
16f1dae0 150GLOBAL(ashiftrt_r4_20):
a85a2db0 151 shar r4
16f1dae0 152GLOBAL(ashiftrt_r4_19):
a85a2db0 153 shar r4
16f1dae0 154GLOBAL(ashiftrt_r4_18):
a85a2db0 155 shar r4
16f1dae0 156GLOBAL(ashiftrt_r4_17):
a85a2db0 157 shar r4
16f1dae0 158GLOBAL(ashiftrt_r4_16):
30b0bb86 159 shlr16 r4
160 rts
161 exts.w r4,r4
162
16f1dae0 163GLOBAL(ashiftrt_r4_15):
a85a2db0 164 shar r4
16f1dae0 165GLOBAL(ashiftrt_r4_14):
a85a2db0 166 shar r4
16f1dae0 167GLOBAL(ashiftrt_r4_13):
a85a2db0 168 shar r4
16f1dae0 169GLOBAL(ashiftrt_r4_12):
a85a2db0 170 shar r4
16f1dae0 171GLOBAL(ashiftrt_r4_11):
a85a2db0 172 shar r4
16f1dae0 173GLOBAL(ashiftrt_r4_10):
a85a2db0 174 shar r4
16f1dae0 175GLOBAL(ashiftrt_r4_9):
a85a2db0 176 shar r4
16f1dae0 177GLOBAL(ashiftrt_r4_8):
a85a2db0 178 shar r4
16f1dae0 179GLOBAL(ashiftrt_r4_7):
a85a2db0 180 shar r4
16f1dae0 181GLOBAL(ashiftrt_r4_6):
a85a2db0 182 shar r4
16f1dae0 183GLOBAL(ashiftrt_r4_5):
a85a2db0 184 shar r4
16f1dae0 185GLOBAL(ashiftrt_r4_4):
a85a2db0 186 shar r4
16f1dae0 187GLOBAL(ashiftrt_r4_3):
a85a2db0 188 shar r4
16f1dae0 189GLOBAL(ashiftrt_r4_2):
a85a2db0 190 shar r4
16f1dae0 191GLOBAL(ashiftrt_r4_1):
a85a2db0 192 rts
193 shar r4
194
16f1dae0 195GLOBAL(ashiftrt_r4_0):
a85a2db0 196 rts
627d08ca 197 nop
805e22b2 198
199 ENDFUNC(GLOBAL(ashiftrt_r4_0))
200 ENDFUNC(GLOBAL(ashiftrt_r4_1))
201 ENDFUNC(GLOBAL(ashiftrt_r4_2))
202 ENDFUNC(GLOBAL(ashiftrt_r4_3))
203 ENDFUNC(GLOBAL(ashiftrt_r4_4))
204 ENDFUNC(GLOBAL(ashiftrt_r4_5))
205 ENDFUNC(GLOBAL(ashiftrt_r4_6))
206 ENDFUNC(GLOBAL(ashiftrt_r4_7))
207 ENDFUNC(GLOBAL(ashiftrt_r4_8))
208 ENDFUNC(GLOBAL(ashiftrt_r4_9))
209 ENDFUNC(GLOBAL(ashiftrt_r4_10))
210 ENDFUNC(GLOBAL(ashiftrt_r4_11))
211 ENDFUNC(GLOBAL(ashiftrt_r4_12))
212 ENDFUNC(GLOBAL(ashiftrt_r4_13))
213 ENDFUNC(GLOBAL(ashiftrt_r4_14))
214 ENDFUNC(GLOBAL(ashiftrt_r4_15))
215 ENDFUNC(GLOBAL(ashiftrt_r4_16))
216 ENDFUNC(GLOBAL(ashiftrt_r4_17))
217 ENDFUNC(GLOBAL(ashiftrt_r4_18))
218 ENDFUNC(GLOBAL(ashiftrt_r4_19))
219 ENDFUNC(GLOBAL(ashiftrt_r4_20))
220 ENDFUNC(GLOBAL(ashiftrt_r4_21))
221 ENDFUNC(GLOBAL(ashiftrt_r4_22))
222 ENDFUNC(GLOBAL(ashiftrt_r4_23))
223 ENDFUNC(GLOBAL(ashiftrt_r4_24))
224 ENDFUNC(GLOBAL(ashiftrt_r4_25))
225 ENDFUNC(GLOBAL(ashiftrt_r4_26))
226 ENDFUNC(GLOBAL(ashiftrt_r4_27))
227 ENDFUNC(GLOBAL(ashiftrt_r4_28))
228 ENDFUNC(GLOBAL(ashiftrt_r4_29))
229 ENDFUNC(GLOBAL(ashiftrt_r4_30))
230 ENDFUNC(GLOBAL(ashiftrt_r4_31))
231 ENDFUNC(GLOBAL(ashiftrt_r4_32))
a85a2db0 232#endif
233
30b0bb86 234#ifdef L_ashiftrt_n
235
236!
16f1dae0 237! GLOBAL(ashrsi3)
30b0bb86 238!
239! Entry:
240!
241! r4: Value to shift
6e7c6395 242! r5: Shift count
30b0bb86 243!
244! Exit:
245!
246! r0: Result
247!
248! Destroys:
249!
6e7c6395 250! T bit, r5
30b0bb86 251!
252
16f1dae0 253 .global GLOBAL(ashrsi3)
59312820 254 HIDDEN_FUNC(GLOBAL(ashrsi3))
30b0bb86 255 .align 2
16f1dae0 256GLOBAL(ashrsi3):
30b0bb86 257 mov #31,r0
d73f1571 258 and r0,r5
259 mova LOCAL(ashrsi3_table),r0
0f04c0de 260 mov.b @(r0,r5),r5
d73f1571 261#ifdef __sh1__
262 add r5,r0
30b0bb86 263 jmp @r0
d73f1571 264#else
265 braf r5
266#endif
30b0bb86 267 mov r4,r0
268
d73f1571 269 .align 2
270LOCAL(ashrsi3_table):
271 .byte LOCAL(ashrsi3_0)-LOCAL(ashrsi3_table)
272 .byte LOCAL(ashrsi3_1)-LOCAL(ashrsi3_table)
273 .byte LOCAL(ashrsi3_2)-LOCAL(ashrsi3_table)
274 .byte LOCAL(ashrsi3_3)-LOCAL(ashrsi3_table)
275 .byte LOCAL(ashrsi3_4)-LOCAL(ashrsi3_table)
276 .byte LOCAL(ashrsi3_5)-LOCAL(ashrsi3_table)
277 .byte LOCAL(ashrsi3_6)-LOCAL(ashrsi3_table)
278 .byte LOCAL(ashrsi3_7)-LOCAL(ashrsi3_table)
279 .byte LOCAL(ashrsi3_8)-LOCAL(ashrsi3_table)
280 .byte LOCAL(ashrsi3_9)-LOCAL(ashrsi3_table)
281 .byte LOCAL(ashrsi3_10)-LOCAL(ashrsi3_table)
282 .byte LOCAL(ashrsi3_11)-LOCAL(ashrsi3_table)
283 .byte LOCAL(ashrsi3_12)-LOCAL(ashrsi3_table)
284 .byte LOCAL(ashrsi3_13)-LOCAL(ashrsi3_table)
285 .byte LOCAL(ashrsi3_14)-LOCAL(ashrsi3_table)
286 .byte LOCAL(ashrsi3_15)-LOCAL(ashrsi3_table)
287 .byte LOCAL(ashrsi3_16)-LOCAL(ashrsi3_table)
288 .byte LOCAL(ashrsi3_17)-LOCAL(ashrsi3_table)
289 .byte LOCAL(ashrsi3_18)-LOCAL(ashrsi3_table)
290 .byte LOCAL(ashrsi3_19)-LOCAL(ashrsi3_table)
291 .byte LOCAL(ashrsi3_20)-LOCAL(ashrsi3_table)
292 .byte LOCAL(ashrsi3_21)-LOCAL(ashrsi3_table)
293 .byte LOCAL(ashrsi3_22)-LOCAL(ashrsi3_table)
294 .byte LOCAL(ashrsi3_23)-LOCAL(ashrsi3_table)
295 .byte LOCAL(ashrsi3_24)-LOCAL(ashrsi3_table)
296 .byte LOCAL(ashrsi3_25)-LOCAL(ashrsi3_table)
297 .byte LOCAL(ashrsi3_26)-LOCAL(ashrsi3_table)
298 .byte LOCAL(ashrsi3_27)-LOCAL(ashrsi3_table)
299 .byte LOCAL(ashrsi3_28)-LOCAL(ashrsi3_table)
300 .byte LOCAL(ashrsi3_29)-LOCAL(ashrsi3_table)
301 .byte LOCAL(ashrsi3_30)-LOCAL(ashrsi3_table)
302 .byte LOCAL(ashrsi3_31)-LOCAL(ashrsi3_table)
303
304LOCAL(ashrsi3_31):
30b0bb86 305 rotcl r0
306 rts
307 subc r0,r0
308
d73f1571 309LOCAL(ashrsi3_30):
30b0bb86 310 shar r0
d73f1571 311LOCAL(ashrsi3_29):
30b0bb86 312 shar r0
d73f1571 313LOCAL(ashrsi3_28):
30b0bb86 314 shar r0
d73f1571 315LOCAL(ashrsi3_27):
30b0bb86 316 shar r0
d73f1571 317LOCAL(ashrsi3_26):
30b0bb86 318 shar r0
d73f1571 319LOCAL(ashrsi3_25):
30b0bb86 320 shar r0
d73f1571 321LOCAL(ashrsi3_24):
30b0bb86 322 shlr16 r0
323 shlr8 r0
324 rts
325 exts.b r0,r0
326
d73f1571 327LOCAL(ashrsi3_23):
30b0bb86 328 shar r0
d73f1571 329LOCAL(ashrsi3_22):
30b0bb86 330 shar r0
d73f1571 331LOCAL(ashrsi3_21):
30b0bb86 332 shar r0
d73f1571 333LOCAL(ashrsi3_20):
30b0bb86 334 shar r0
d73f1571 335LOCAL(ashrsi3_19):
30b0bb86 336 shar r0
d73f1571 337LOCAL(ashrsi3_18):
30b0bb86 338 shar r0
d73f1571 339LOCAL(ashrsi3_17):
30b0bb86 340 shar r0
d73f1571 341LOCAL(ashrsi3_16):
30b0bb86 342 shlr16 r0
343 rts
344 exts.w r0,r0
345
d73f1571 346LOCAL(ashrsi3_15):
30b0bb86 347 shar r0
d73f1571 348LOCAL(ashrsi3_14):
30b0bb86 349 shar r0
d73f1571 350LOCAL(ashrsi3_13):
30b0bb86 351 shar r0
d73f1571 352LOCAL(ashrsi3_12):
30b0bb86 353 shar r0
d73f1571 354LOCAL(ashrsi3_11):
30b0bb86 355 shar r0
d73f1571 356LOCAL(ashrsi3_10):
30b0bb86 357 shar r0
d73f1571 358LOCAL(ashrsi3_9):
30b0bb86 359 shar r0
d73f1571 360LOCAL(ashrsi3_8):
30b0bb86 361 shar r0
d73f1571 362LOCAL(ashrsi3_7):
30b0bb86 363 shar r0
d73f1571 364LOCAL(ashrsi3_6):
30b0bb86 365 shar r0
d73f1571 366LOCAL(ashrsi3_5):
30b0bb86 367 shar r0
d73f1571 368LOCAL(ashrsi3_4):
30b0bb86 369 shar r0
d73f1571 370LOCAL(ashrsi3_3):
30b0bb86 371 shar r0
d73f1571 372LOCAL(ashrsi3_2):
30b0bb86 373 shar r0
d73f1571 374LOCAL(ashrsi3_1):
30b0bb86 375 rts
376 shar r0
377
d73f1571 378LOCAL(ashrsi3_0):
30b0bb86 379 rts
380 nop
381
805e22b2 382 ENDFUNC(GLOBAL(ashrsi3))
30b0bb86 383#endif
384
385#ifdef L_ashiftlt
386
387!
16f1dae0 388! GLOBAL(ashlsi3)
6e7c6395 389! (For compatibility with older binaries, not used by compiler)
30b0bb86 390!
391! Entry:
6e7c6395 392! r4: Value to shift
393! r5: Shift count
30b0bb86 394!
395! Exit:
6e7c6395 396! r0: Result
30b0bb86 397!
398! Destroys:
6e7c6395 399! T bit
400!
30b0bb86 401!
6e7c6395 402! GLOBAL(ashlsi3_r0)
30b0bb86 403!
6e7c6395 404! Entry:
405! r4: Value to shift
406! r0: Shift count
407!
408! Exit:
409! r0: Result
410!
411! Destroys:
412! T bit
413
16f1dae0 414 .global GLOBAL(ashlsi3)
6e7c6395 415 .global GLOBAL(ashlsi3_r0)
59312820 416 HIDDEN_FUNC(GLOBAL(ashlsi3))
6e7c6395 417 HIDDEN_FUNC(GLOBAL(ashlsi3_r0))
16f1dae0 418GLOBAL(ashlsi3):
6e7c6395 419 mov r5,r0
420 .align 2
421GLOBAL(ashlsi3_r0):
422
d73f1571 423#ifdef __sh1__
6e7c6395 424 and #31,r0
425 shll2 r0
426 mov.l r4,@-r15
427 mov r0,r4
428 mova LOCAL(ashlsi3_table),r0
429 add r4,r0
430 mov.l @r15+,r4
30b0bb86 431 jmp @r0
6e7c6395 432 mov r4,r0
433 .align 2
d73f1571 434#else
6e7c6395 435 and #31,r0
436 shll2 r0
437 braf r0
30b0bb86 438 mov r4,r0
6e7c6395 439#endif
30b0bb86 440
d73f1571 441LOCAL(ashlsi3_table):
6e7c6395 442 rts // << 0
443 nop
444LOCAL(ashlsi_1):
445 rts // << 1
446 shll r0
447LOCAL(ashlsi_2): // << 2
30b0bb86 448 rts
449 shll2 r0
6e7c6395 450 bra LOCAL(ashlsi_1) // << 3
30b0bb86 451 shll2 r0
6e7c6395 452 bra LOCAL(ashlsi_2) // << 4
30b0bb86 453 shll2 r0
6e7c6395 454 bra LOCAL(ashlsi_5) // << 5
30b0bb86 455 shll r0
6e7c6395 456 bra LOCAL(ashlsi_6) // << 6
30b0bb86 457 shll2 r0
6e7c6395 458 bra LOCAL(ashlsi_7) // << 7
459 shll r0
460LOCAL(ashlsi_8): // << 8
30b0bb86 461 rts
462 shll8 r0
6e7c6395 463 bra LOCAL(ashlsi_8) // << 9
464 shll r0
465 bra LOCAL(ashlsi_8) // << 10
30b0bb86 466 shll2 r0
6e7c6395 467 bra LOCAL(ashlsi_11) // << 11
468 shll r0
469 bra LOCAL(ashlsi_12) // << 12
30b0bb86 470 shll2 r0
6e7c6395 471 bra LOCAL(ashlsi_13) // << 13
472 shll r0
473 bra LOCAL(ashlsi_14) // << 14
30b0bb86 474 shll8 r0
6e7c6395 475 bra LOCAL(ashlsi_15) // << 15
476 shll8 r0
477LOCAL(ashlsi_16): // << 16
30b0bb86 478 rts
6e7c6395 479 shll16 r0
480 bra LOCAL(ashlsi_16) // << 17
30b0bb86 481 shll r0
6e7c6395 482 bra LOCAL(ashlsi_16) // << 18
30b0bb86 483 shll2 r0
6e7c6395 484 bra LOCAL(ashlsi_19) // << 19
485 shll r0
486 bra LOCAL(ashlsi_20) // << 20
30b0bb86 487 shll2 r0
6e7c6395 488 bra LOCAL(ashlsi_21) // << 21
489 shll r0
490 bra LOCAL(ashlsi_22) // << 22
30b0bb86 491 shll16 r0
6e7c6395 492 bra LOCAL(ashlsi_23) // << 23
493 shll16 r0
494 bra LOCAL(ashlsi_16) // << 24
495 shll8 r0
496 bra LOCAL(ashlsi_25) // << 25
497 shll r0
498 bra LOCAL(ashlsi_26) // << 26
30b0bb86 499 shll2 r0
6e7c6395 500 bra LOCAL(ashlsi_27) // << 27
501 shll r0
502 bra LOCAL(ashlsi_28) // << 28
30b0bb86 503 shll2 r0
6e7c6395 504 bra LOCAL(ashlsi_29) // << 29
505 shll16 r0
506 bra LOCAL(ashlsi_30) // << 30
30b0bb86 507 shll16 r0
6e7c6395 508 and #1,r0 // << 31
30b0bb86 509 rts
6e7c6395 510 rotr r0
30b0bb86 511
6e7c6395 512LOCAL(ashlsi_7):
30b0bb86 513 shll2 r0
6e7c6395 514LOCAL(ashlsi_5):
515LOCAL(ashlsi_6):
30b0bb86 516 shll2 r0
30b0bb86 517 rts
6e7c6395 518LOCAL(ashlsi_13):
30b0bb86 519 shll2 r0
6e7c6395 520LOCAL(ashlsi_12):
521LOCAL(ashlsi_11):
522 shll8 r0
523 rts
524LOCAL(ashlsi_21):
30b0bb86 525 shll2 r0
6e7c6395 526LOCAL(ashlsi_20):
527LOCAL(ashlsi_19):
528 shll16 r0
529 rts
530LOCAL(ashlsi_28):
531LOCAL(ashlsi_27):
30b0bb86 532 shll2 r0
6e7c6395 533LOCAL(ashlsi_26):
534LOCAL(ashlsi_25):
30b0bb86 535 shll16 r0
6e7c6395 536 rts
30b0bb86 537 shll8 r0
6e7c6395 538
539LOCAL(ashlsi_22):
540LOCAL(ashlsi_14):
541 shlr2 r0
30b0bb86 542 rts
6e7c6395 543 shll8 r0
30b0bb86 544
6e7c6395 545LOCAL(ashlsi_23):
546LOCAL(ashlsi_15):
547 shlr r0
30b0bb86 548 rts
6e7c6395 549 shll8 r0
550
551LOCAL(ashlsi_29):
552 shlr r0
553LOCAL(ashlsi_30):
554 shlr2 r0
555 rts
556 shll16 r0
30b0bb86 557
805e22b2 558 ENDFUNC(GLOBAL(ashlsi3))
6e7c6395 559 ENDFUNC(GLOBAL(ashlsi3_r0))
30b0bb86 560#endif
561
562#ifdef L_lshiftrt
563
564!
16f1dae0 565! GLOBAL(lshrsi3)
6e7c6395 566! (For compatibility with older binaries, not used by compiler)
30b0bb86 567!
568! Entry:
6e7c6395 569! r4: Value to shift
570! r5: Shift count
30b0bb86 571!
572! Exit:
6e7c6395 573! r0: Result
30b0bb86 574!
575! Destroys:
6e7c6395 576! T bit
577!
578!
579! GLOBAL(lshrsi3_r0)
30b0bb86 580!
6e7c6395 581! Entry:
582! r4: Value to shift
583! r0: Shift count
584!
585! Exit:
586! r0: Result
30b0bb86 587!
6e7c6395 588! Destroys:
589! T bit
590
16f1dae0 591 .global GLOBAL(lshrsi3)
6e7c6395 592 .global GLOBAL(lshrsi3_r0)
59312820 593 HIDDEN_FUNC(GLOBAL(lshrsi3))
6e7c6395 594 HIDDEN_FUNC(GLOBAL(lshrsi3_r0))
16f1dae0 595GLOBAL(lshrsi3):
6e7c6395 596 mov r5,r0
597 .align 2
598GLOBAL(lshrsi3_r0):
599
d73f1571 600#ifdef __sh1__
6e7c6395 601 and #31,r0
602 shll2 r0
603 mov.l r4,@-r15
604 mov r0,r4
605 mova LOCAL(lshrsi3_table),r0
606 add r4,r0
607 mov.l @r15+,r4
30b0bb86 608 jmp @r0
6e7c6395 609 mov r4,r0
610 .align 2
d73f1571 611#else
6e7c6395 612 and #31,r0
613 shll2 r0
614 braf r0
30b0bb86 615 mov r4,r0
6e7c6395 616#endif
d73f1571 617LOCAL(lshrsi3_table):
6e7c6395 618 rts // >> 0
619 nop
620LOCAL(lshrsi_1): // >> 1
621 rts
622 shlr r0
623LOCAL(lshrsi_2): // >> 2
30b0bb86 624 rts
625 shlr2 r0
6e7c6395 626 bra LOCAL(lshrsi_1) // >> 3
30b0bb86 627 shlr2 r0
6e7c6395 628 bra LOCAL(lshrsi_2) // >> 4
30b0bb86 629 shlr2 r0
6e7c6395 630 bra LOCAL(lshrsi_5) // >> 5
30b0bb86 631 shlr r0
6e7c6395 632 bra LOCAL(lshrsi_6) // >> 6
30b0bb86 633 shlr2 r0
6e7c6395 634 bra LOCAL(lshrsi_7) // >> 7
635 shlr r0
636LOCAL(lshrsi_8): // >> 8
30b0bb86 637 rts
638 shlr8 r0
6e7c6395 639 bra LOCAL(lshrsi_8) // >> 9
640 shlr r0
641 bra LOCAL(lshrsi_8) // >> 10
30b0bb86 642 shlr2 r0
6e7c6395 643 bra LOCAL(lshrsi_11) // >> 11
644 shlr r0
645 bra LOCAL(lshrsi_12) // >> 12
30b0bb86 646 shlr2 r0
6e7c6395 647 bra LOCAL(lshrsi_13) // >> 13
648 shlr r0
649 bra LOCAL(lshrsi_14) // >> 14
650 shlr8 r0
651 bra LOCAL(lshrsi_15) // >> 15
30b0bb86 652 shlr8 r0
6e7c6395 653LOCAL(lshrsi_16): // >> 16
30b0bb86 654 rts
6e7c6395 655 shlr16 r0
656 bra LOCAL(lshrsi_16) // >> 17
30b0bb86 657 shlr r0
6e7c6395 658 bra LOCAL(lshrsi_16) // >> 18
30b0bb86 659 shlr2 r0
6e7c6395 660 bra LOCAL(lshrsi_19) // >> 19
661 shlr r0
662 bra LOCAL(lshrsi_20) // >> 20
30b0bb86 663 shlr2 r0
6e7c6395 664 bra LOCAL(lshrsi_21) // >> 21
665 shlr r0
666 bra LOCAL(lshrsi_22) // >> 22
30b0bb86 667 shlr16 r0
6e7c6395 668 bra LOCAL(lshrsi_23) // >> 23
669 shlr16 r0
670 bra LOCAL(lshrsi_16) // >> 24
671 shlr8 r0
672 bra LOCAL(lshrsi_25) // >> 25
673 shlr r0
674 bra LOCAL(lshrsi_26) // >> 26
30b0bb86 675 shlr2 r0
6e7c6395 676 bra LOCAL(lshrsi_27) // >> 27
677 shlr r0
678 bra LOCAL(lshrsi_28) // >> 28
30b0bb86 679 shlr2 r0
6e7c6395 680 bra LOCAL(lshrsi_29) // >> 29
681 shlr16 r0
682 bra LOCAL(lshrsi_30) // >> 30
30b0bb86 683 shlr16 r0
6e7c6395 684 shll r0 // >> 31
30b0bb86 685 rts
6e7c6395 686 movt r0
30b0bb86 687
6e7c6395 688LOCAL(lshrsi_7):
30b0bb86 689 shlr2 r0
6e7c6395 690LOCAL(lshrsi_5):
691LOCAL(lshrsi_6):
30b0bb86 692 shlr2 r0
30b0bb86 693 rts
6e7c6395 694LOCAL(lshrsi_13):
30b0bb86 695 shlr2 r0
6e7c6395 696LOCAL(lshrsi_12):
697LOCAL(lshrsi_11):
698 shlr8 r0
699 rts
700LOCAL(lshrsi_21):
30b0bb86 701 shlr2 r0
6e7c6395 702LOCAL(lshrsi_20):
703LOCAL(lshrsi_19):
704 shlr16 r0
705 rts
706LOCAL(lshrsi_28):
707LOCAL(lshrsi_27):
30b0bb86 708 shlr2 r0
6e7c6395 709LOCAL(lshrsi_26):
710LOCAL(lshrsi_25):
30b0bb86 711 shlr16 r0
6e7c6395 712 rts
30b0bb86 713 shlr8 r0
6e7c6395 714
715LOCAL(lshrsi_22):
716LOCAL(lshrsi_14):
717 shll2 r0
30b0bb86 718 rts
6e7c6395 719 shlr8 r0
30b0bb86 720
6e7c6395 721LOCAL(lshrsi_23):
722LOCAL(lshrsi_15):
723 shll r0
30b0bb86 724 rts
6e7c6395 725 shlr8 r0
726
727LOCAL(lshrsi_29):
728 shll r0
729LOCAL(lshrsi_30):
730 shll2 r0
731 rts
732 shlr16 r0
30b0bb86 733
805e22b2 734 ENDFUNC(GLOBAL(lshrsi3))
6e7c6395 735 ENDFUNC(GLOBAL(lshrsi3_r0))
30b0bb86 736#endif
737
008c057d 738#ifdef L_movmem
a85a2db0 739 .text
59312820 740 .balign 4
741 .global GLOBAL(movmem)
742 HIDDEN_FUNC(GLOBAL(movmem))
743 HIDDEN_ALIAS(movstr,movmem)
744 /* This would be a lot simpler if r6 contained the byte count
745 minus 64, and we wouldn't be called here for a byte count of 64. */
746GLOBAL(movmem):
747 sts.l pr,@-r15
748 shll2 r6
749 bsr GLOBAL(movmemSI52+2)
750 mov.l @(48,r5),r0
751 .balign 4
752LOCAL(movmem_loop): /* Reached with rts */
753 mov.l @(60,r5),r0
754 add #-64,r6
755 mov.l r0,@(60,r4)
756 tst r6,r6
757 mov.l @(56,r5),r0
758 bt LOCAL(movmem_done)
759 mov.l r0,@(56,r4)
760 cmp/pl r6
761 mov.l @(52,r5),r0
762 add #64,r5
763 mov.l r0,@(52,r4)
764 add #64,r4
765 bt GLOBAL(movmemSI52)
a85a2db0 766! done all the large groups, do the remainder
008c057d 767! jump to movmem+
59312820 768 mova GLOBAL(movmemSI4)+4,r0
a85a2db0 769 add r6,r0
770 jmp @r0
59312820 771LOCAL(movmem_done): ! share slot insn, works out aligned.
772 lds.l @r15+,pr
773 mov.l r0,@(56,r4)
774 mov.l @(52,r5),r0
775 rts
776 mov.l r0,@(52,r4)
777 .balign 4
94ccf9fb 778! ??? We need aliases movstr* for movmem* for the older libraries. These
779! aliases will be removed at the some point in the future.
008c057d 780 .global GLOBAL(movmemSI64)
59312820 781 HIDDEN_FUNC(GLOBAL(movmemSI64))
782 HIDDEN_ALIAS(movstrSI64,movmemSI64)
008c057d 783GLOBAL(movmemSI64):
a85a2db0 784 mov.l @(60,r5),r0
785 mov.l r0,@(60,r4)
008c057d 786 .global GLOBAL(movmemSI60)
59312820 787 HIDDEN_FUNC(GLOBAL(movmemSI60))
788 HIDDEN_ALIAS(movstrSI60,movmemSI60)
008c057d 789GLOBAL(movmemSI60):
a85a2db0 790 mov.l @(56,r5),r0
791 mov.l r0,@(56,r4)
008c057d 792 .global GLOBAL(movmemSI56)
59312820 793 HIDDEN_FUNC(GLOBAL(movmemSI56))
794 HIDDEN_ALIAS(movstrSI56,movmemSI56)
008c057d 795GLOBAL(movmemSI56):
a85a2db0 796 mov.l @(52,r5),r0
797 mov.l r0,@(52,r4)
008c057d 798 .global GLOBAL(movmemSI52)
59312820 799 HIDDEN_FUNC(GLOBAL(movmemSI52))
800 HIDDEN_ALIAS(movstrSI52,movmemSI52)
008c057d 801GLOBAL(movmemSI52):
a85a2db0 802 mov.l @(48,r5),r0
803 mov.l r0,@(48,r4)
008c057d 804 .global GLOBAL(movmemSI48)
59312820 805 HIDDEN_FUNC(GLOBAL(movmemSI48))
806 HIDDEN_ALIAS(movstrSI48,movmemSI48)
008c057d 807GLOBAL(movmemSI48):
a85a2db0 808 mov.l @(44,r5),r0
809 mov.l r0,@(44,r4)
008c057d 810 .global GLOBAL(movmemSI44)
59312820 811 HIDDEN_FUNC(GLOBAL(movmemSI44))
812 HIDDEN_ALIAS(movstrSI44,movmemSI44)
008c057d 813GLOBAL(movmemSI44):
a85a2db0 814 mov.l @(40,r5),r0
815 mov.l r0,@(40,r4)
008c057d 816 .global GLOBAL(movmemSI40)
59312820 817 HIDDEN_FUNC(GLOBAL(movmemSI40))
818 HIDDEN_ALIAS(movstrSI40,movmemSI40)
008c057d 819GLOBAL(movmemSI40):
a85a2db0 820 mov.l @(36,r5),r0
821 mov.l r0,@(36,r4)
008c057d 822 .global GLOBAL(movmemSI36)
59312820 823 HIDDEN_FUNC(GLOBAL(movmemSI36))
824 HIDDEN_ALIAS(movstrSI36,movmemSI36)
008c057d 825GLOBAL(movmemSI36):
a85a2db0 826 mov.l @(32,r5),r0
827 mov.l r0,@(32,r4)
008c057d 828 .global GLOBAL(movmemSI32)
59312820 829 HIDDEN_FUNC(GLOBAL(movmemSI32))
830 HIDDEN_ALIAS(movstrSI32,movmemSI32)
008c057d 831GLOBAL(movmemSI32):
a85a2db0 832 mov.l @(28,r5),r0
833 mov.l r0,@(28,r4)
008c057d 834 .global GLOBAL(movmemSI28)
59312820 835 HIDDEN_FUNC(GLOBAL(movmemSI28))
836 HIDDEN_ALIAS(movstrSI28,movmemSI28)
008c057d 837GLOBAL(movmemSI28):
a85a2db0 838 mov.l @(24,r5),r0
839 mov.l r0,@(24,r4)
008c057d 840 .global GLOBAL(movmemSI24)
59312820 841 HIDDEN_FUNC(GLOBAL(movmemSI24))
842 HIDDEN_ALIAS(movstrSI24,movmemSI24)
008c057d 843GLOBAL(movmemSI24):
a85a2db0 844 mov.l @(20,r5),r0
845 mov.l r0,@(20,r4)
008c057d 846 .global GLOBAL(movmemSI20)
59312820 847 HIDDEN_FUNC(GLOBAL(movmemSI20))
848 HIDDEN_ALIAS(movstrSI20,movmemSI20)
008c057d 849GLOBAL(movmemSI20):
a85a2db0 850 mov.l @(16,r5),r0
851 mov.l r0,@(16,r4)
008c057d 852 .global GLOBAL(movmemSI16)
59312820 853 HIDDEN_FUNC(GLOBAL(movmemSI16))
854 HIDDEN_ALIAS(movstrSI16,movmemSI16)
008c057d 855GLOBAL(movmemSI16):
a85a2db0 856 mov.l @(12,r5),r0
857 mov.l r0,@(12,r4)
008c057d 858 .global GLOBAL(movmemSI12)
59312820 859 HIDDEN_FUNC(GLOBAL(movmemSI12))
860 HIDDEN_ALIAS(movstrSI12,movmemSI12)
008c057d 861GLOBAL(movmemSI12):
a85a2db0 862 mov.l @(8,r5),r0
863 mov.l r0,@(8,r4)
008c057d 864 .global GLOBAL(movmemSI8)
59312820 865 HIDDEN_FUNC(GLOBAL(movmemSI8))
866 HIDDEN_ALIAS(movstrSI8,movmemSI8)
008c057d 867GLOBAL(movmemSI8):
a85a2db0 868 mov.l @(4,r5),r0
869 mov.l r0,@(4,r4)
008c057d 870 .global GLOBAL(movmemSI4)
59312820 871 HIDDEN_FUNC(GLOBAL(movmemSI4))
872 HIDDEN_ALIAS(movstrSI4,movmemSI4)
008c057d 873GLOBAL(movmemSI4):
a85a2db0 874 mov.l @(0,r5),r0
a85a2db0 875 rts
59312820 876 mov.l r0,@(0,r4)
a85a2db0 877
008c057d 878 ENDFUNC(GLOBAL(movmemSI64))
879 ENDFUNC(GLOBAL(movmemSI60))
880 ENDFUNC(GLOBAL(movmemSI56))
881 ENDFUNC(GLOBAL(movmemSI52))
882 ENDFUNC(GLOBAL(movmemSI48))
883 ENDFUNC(GLOBAL(movmemSI44))
884 ENDFUNC(GLOBAL(movmemSI40))
885 ENDFUNC(GLOBAL(movmemSI36))
886 ENDFUNC(GLOBAL(movmemSI32))
887 ENDFUNC(GLOBAL(movmemSI28))
888 ENDFUNC(GLOBAL(movmemSI24))
889 ENDFUNC(GLOBAL(movmemSI20))
890 ENDFUNC(GLOBAL(movmemSI16))
891 ENDFUNC(GLOBAL(movmemSI12))
892 ENDFUNC(GLOBAL(movmemSI8))
893 ENDFUNC(GLOBAL(movmemSI4))
59312820 894 ENDFUNC(GLOBAL(movmem))
a85a2db0 895#endif
896
008c057d 897#ifdef L_movmem_i4
1b61190c 898 .text
008c057d 899 .global GLOBAL(movmem_i4_even)
900 .global GLOBAL(movmem_i4_odd)
901 .global GLOBAL(movmemSI12_i4)
1b61190c 902
59312820 903 HIDDEN_FUNC(GLOBAL(movmem_i4_even))
904 HIDDEN_FUNC(GLOBAL(movmem_i4_odd))
905 HIDDEN_FUNC(GLOBAL(movmemSI12_i4))
805e22b2 906
59312820 907 HIDDEN_ALIAS(movstr_i4_even,movmem_i4_even)
908 HIDDEN_ALIAS(movstr_i4_odd,movmem_i4_odd)
909 HIDDEN_ALIAS(movstrSI12_i4,movmemSI12_i4)
94ccf9fb 910
1b61190c 911 .p2align 5
008c057d 912L_movmem_2mod4_end:
1b61190c 913 mov.l r0,@(16,r4)
914 rts
915 mov.l r1,@(20,r4)
916
917 .p2align 2
918
008c057d 919GLOBAL(movmem_i4_even):
805e22b2 920 mov.l @r5+,r0
008c057d 921 bra L_movmem_start_even
805e22b2 922 mov.l @r5+,r1
923
008c057d 924GLOBAL(movmem_i4_odd):
1b61190c 925 mov.l @r5+,r1
926 add #-4,r4
927 mov.l @r5+,r2
928 mov.l @r5+,r3
929 mov.l r1,@(4,r4)
930 mov.l r2,@(8,r4)
931
008c057d 932L_movmem_loop:
1b61190c 933 mov.l r3,@(12,r4)
934 dt r6
935 mov.l @r5+,r0
008c057d 936 bt/s L_movmem_2mod4_end
1b61190c 937 mov.l @r5+,r1
938 add #16,r4
008c057d 939L_movmem_start_even:
1b61190c 940 mov.l @r5+,r2
941 mov.l @r5+,r3
942 mov.l r0,@r4
943 dt r6
944 mov.l r1,@(4,r4)
008c057d 945 bf/s L_movmem_loop
1b61190c 946 mov.l r2,@(8,r4)
947 rts
948 mov.l r3,@(12,r4)
949
008c057d 950 ENDFUNC(GLOBAL(movmem_i4_even))
951 ENDFUNC(GLOBAL(movmem_i4_odd))
1b61190c 952
953 .p2align 4
008c057d 954GLOBAL(movmemSI12_i4):
1b61190c 955 mov.l @r5,r0
956 mov.l @(4,r5),r1
957 mov.l @(8,r5),r2
958 mov.l r0,@r4
959 mov.l r1,@(4,r4)
960 rts
961 mov.l r2,@(8,r4)
805e22b2 962
008c057d 963 ENDFUNC(GLOBAL(movmemSI12_i4))
1b61190c 964#endif
965
a85a2db0 966#ifdef L_mulsi3
967
968
16f1dae0 969 .global GLOBAL(mulsi3)
59312820 970 HIDDEN_FUNC(GLOBAL(mulsi3))
a85a2db0 971
972! r4 = aabb
973! r5 = ccdd
974! r0 = aabb*ccdd via partial products
975!
976! if aa == 0 and cc = 0
977! r0 = bb*dd
978!
979! else
980! aa = bb*dd + (aa*dd*65536) + (cc*bb*65536)
981!
982
16f1dae0 983GLOBAL(mulsi3):
fb062b27 984 mulu.w r4,r5 ! multiply the lsws macl=bb*dd
a85a2db0 985 mov r5,r3 ! r3 = ccdd
986 swap.w r4,r2 ! r2 = bbaa
987 xtrct r2,r3 ! r3 = aacc
988 tst r3,r3 ! msws zero ?
30b0bb86 989 bf hiset
b090827b 990 rts ! yes - then we have the answer
a85a2db0 991 sts macl,r0
992
993hiset: sts macl,r0 ! r0 = bb*dd
fb062b27 994 mulu.w r2,r5 ! brewing macl = aa*dd
a85a2db0 995 sts macl,r1
fb062b27 996 mulu.w r3,r4 ! brewing macl = cc*bb
30b0bb86 997 sts macl,r2
a85a2db0 998 add r1,r2
999 shll16 r2
1000 rts
1001 add r2,r0
30b0bb86 1002
59312820 1003 ENDFUNC(GLOBAL(mulsi3))
30b0bb86 1004#endif
87e19636 1005#endif /* ! __SH5__ */
fe9c9e23 1006
1007/*------------------------------------------------------------------------------
1008 32 bit signed integer division that uses FPU double precision division. */
1009
1b61190c 1010#ifdef L_sdivsi3_i4
a85a2db0 1011 .title "SH DIVIDE"
fe9c9e23 1012
bb057878 1013#if defined (__SH4__) || defined (__SH2A__)
fe9c9e23 1014/* This variant is used when FPSCR.PR = 1 (double precision) is the default
1015 setting.
1016 Args in r4 and r5, result in fpul, clobber dr0, dr2. */
1b61190c 1017
16f1dae0 1018 .global GLOBAL(sdivsi3_i4)
59312820 1019 HIDDEN_FUNC(GLOBAL(sdivsi3_i4))
16f1dae0 1020GLOBAL(sdivsi3_i4):
1b61190c 1021 lds r4,fpul
1022 float fpul,dr0
1023 lds r5,fpul
1024 float fpul,dr2
1025 fdiv dr2,dr0
1026 rts
1027 ftrc dr0,fpul
1028
805e22b2 1029 ENDFUNC(GLOBAL(sdivsi3_i4))
fe9c9e23 1030
bb057878 1031#elif defined (__SH2A_SINGLE__) || defined (__SH2A_SINGLE_ONLY__) || defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) || (defined (__SH5__) && ! defined __SH4_NOFPU__)
fe9c9e23 1032/* This variant is used when FPSCR.PR = 0 (sigle precision) is the default
1033 setting.
1034 Args in r4 and r5, result in fpul, clobber r2, dr0, dr2.
1035 For this to work, we must temporarily switch the FPU do double precision,
1036 but we better do not touch FPSCR.FR. See PR 6526. */
1b61190c 1037
87e19636 1038#if ! __SH5__ || __SH5__ == 32
1039#if __SH5__
1040 .mode SHcompact
1041#endif
16f1dae0 1042 .global GLOBAL(sdivsi3_i4)
59312820 1043 HIDDEN_FUNC(GLOBAL(sdivsi3_i4))
16f1dae0 1044GLOBAL(sdivsi3_i4):
fe9c9e23 1045
1046#ifndef __SH4A__
1047 mov.l r3,@-r15
1048 sts fpscr,r2
1049 mov #8,r3
1050 swap.w r3,r3 // r3 = 1 << 19 (FPSCR.PR bit)
1051 or r2,r3
1052 lds r3,fpscr // Set FPSCR.PR = 1.
1053 lds r4,fpul
1054 float fpul,dr0
1055 lds r5,fpul
1056 float fpul,dr2
1057 fdiv dr2,dr0
1058 ftrc dr0,fpul
1059 lds r2,fpscr
1b61190c 1060 rts
fe9c9e23 1061 mov.l @r15+,r3
1062#else
1063/* On SH4A we can use the fpchg instruction to flip the FPSCR.PR bit. */
1064 fpchg
1065 lds r4,fpul
1066 float fpul,dr0
1067 lds r5,fpul
1068 float fpul,dr2
1069 fdiv dr2,dr0
1070 ftrc dr0,fpul
1071 rts
1072 fpchg
1073
1074#endif /* __SH4A__ */
1b61190c 1075
805e22b2 1076 ENDFUNC(GLOBAL(sdivsi3_i4))
87e19636 1077#endif /* ! __SH5__ || __SH5__ == 32 */
bb057878 1078#endif /* ! __SH4__ || __SH2A__ */
fe9c9e23 1079#endif /* L_sdivsi3_i4 */
1b61190c 1080
fe9c9e23 1081//------------------------------------------------------------------------------
1b61190c 1082#ifdef L_sdivsi3
1083/* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
87ed74ef 1084 sh2e/sh3e code. */
a85a2db0 1085!!
1086!! Steve Chamberlain
1087!! sac@cygnus.com
1088!!
1089!!
1090
805e22b2 1091!! args in r4 and r5, result in r0 clobber r1, r2, r3, and t bit
a85a2db0 1092
16f1dae0 1093 .global GLOBAL(sdivsi3)
87e19636 1094#if __SHMEDIA__
1095#if __SH5__ == 32
1096 .section .text..SHmedia32,"ax"
1097#else
1098 .text
1099#endif
1100 .align 2
0c63e844 1101#if 0
87e19636 1102/* The assembly code that follows is a hand-optimized version of the C
1103 code that follows. Note that the registers that are modified are
1104 exactly those listed as clobbered in the patterns divsi3_i1 and
1105 divsi3_i1_media.
1106
1107int __sdivsi3 (i, j)
1108 int i, j;
1109{
1110 register unsigned long long r18 asm ("r18");
1111 register unsigned long long r19 asm ("r19");
1112 register unsigned long long r0 asm ("r0") = 0;
1113 register unsigned long long r1 asm ("r1") = 1;
1114 register int r2 asm ("r2") = i >> 31;
1115 register int r3 asm ("r3") = j >> 31;
1116
1117 r2 = r2 ? r2 : r1;
1118 r3 = r3 ? r3 : r1;
1119 r18 = i * r2;
1120 r19 = j * r3;
1121 r2 *= r3;
1122
1123 r19 <<= 31;
1124 r1 <<= 31;
1125 do
1126 if (r18 >= r19)
1127 r0 |= r1, r18 -= r19;
1128 while (r19 >>= 1, r1 >>= 1);
1129
1130 return r2 * (int)r0;
1131}
1132*/
1133GLOBAL(sdivsi3):
1134 pt/l LOCAL(sdivsi3_dontadd), tr2
1135 pt/l LOCAL(sdivsi3_loop), tr1
1136 ptabs/l r18, tr0
1137 movi 0, r0
1138 movi 1, r1
1139 shari.l r4, 31, r2
1140 shari.l r5, 31, r3
1141 cmveq r2, r1, r2
1142 cmveq r3, r1, r3
1143 muls.l r4, r2, r18
1144 muls.l r5, r3, r19
1145 muls.l r2, r3, r2
1146 shlli r19, 31, r19
1147 shlli r1, 31, r1
1148LOCAL(sdivsi3_loop):
1149 bgtu r19, r18, tr2
1150 or r0, r1, r0
1151 sub r18, r19, r18
1152LOCAL(sdivsi3_dontadd):
1153 shlri r1, 1, r1
1154 shlri r19, 1, r19
1155 bnei r1, 0, tr1
1156 muls.l r0, r2, r0
1157 add.l r0, r63, r0
1158 blink tr0, r63
59312820 1159#elif 0 /* ! 0 */
0c63e844 1160 // inputs: r4,r5
1161 // clobbered: r1,r2,r3,r18,r19,r20,r21,r25,tr0
1162 // result in r0
1163GLOBAL(sdivsi3):
1164 // can create absolute value without extra latency,
1165 // but dependent on proper sign extension of inputs:
1166 // shari.l r5,31,r2
1167 // xor r5,r2,r20
1168 // sub r20,r2,r20 // r20 is now absolute value of r5, zero-extended.
1169 shari.l r5,31,r2
1170 ori r2,1,r2
1171 muls.l r5,r2,r20 // r20 is now absolute value of r5, zero-extended.
1172 movi 0xffffffffffffbb0c,r19 // shift count eqiv 76
1173 shari.l r4,31,r3
1174 nsb r20,r0
1175 shlld r20,r0,r25
1176 shlri r25,48,r25
1177 sub r19,r25,r1
1178 mmulfx.w r1,r1,r2
1179 mshflo.w r1,r63,r1
1180 // If r4 was to be used in-place instead of r21, could use this sequence
1181 // to compute absolute:
1182 // sub r63,r4,r19 // compute absolute value of r4
1183 // shlri r4,32,r3 // into lower 32 bit of r4, keeping
1184 // mcmv r19,r3,r4 // the sign in the upper 32 bits intact.
1185 ori r3,1,r3
1186 mmulfx.w r25,r2,r2
1187 sub r19,r0,r0
1188 muls.l r4,r3,r21
1189 msub.w r1,r2,r2
1190 addi r2,-2,r1
1191 mulu.l r21,r1,r19
1192 mmulfx.w r2,r2,r2
1193 shlli r1,15,r1
1194 shlrd r19,r0,r19
1195 mulu.l r19,r20,r3
1196 mmacnfx.wl r25,r2,r1
1197 ptabs r18,tr0
1198 sub r21,r3,r25
1199
1200 mulu.l r25,r1,r2
1201 addi r0,14,r0
1202 xor r4,r5,r18
1203 shlrd r2,r0,r2
1204 mulu.l r2,r20,r3
1205 add r19,r2,r19
1206 shari.l r18,31,r18
1207 sub r25,r3,r25
1208
1209 mulu.l r25,r1,r2
1210 sub r25,r20,r25
1211 add r19,r18,r19
1212 shlrd r2,r0,r2
1213 mulu.l r2,r20,r3
1214 addi r25,1,r25
1215 add r19,r2,r19
1216
1217 cmpgt r25,r3,r25
1218 add.l r19,r25,r0
1219 xor r0,r18,r0
1220 blink tr0,r63
59312820 1221#else /* ! 0 && ! 0 */
1222
1223 // inputs: r4,r5
1224 // clobbered: r1,r18,r19,r20,r21,r25,tr0
1225 // result in r0
1226 HIDDEN_FUNC(GLOBAL(sdivsi3_2))
1227#ifndef __pic__
1228 FUNC(GLOBAL(sdivsi3))
1229GLOBAL(sdivsi3): /* this is the shcompact entry point */
1230 // The special SHmedia entry point sdivsi3_1 prevents accidental linking
1231 // with the SHcompact implementation, which clobbers tr1 / tr2.
1232 .global GLOBAL(sdivsi3_1)
1233GLOBAL(sdivsi3_1):
1234 .global GLOBAL(div_table_internal)
1235 movi (GLOBAL(div_table_internal) >> 16) & 65535, r20
1236 shori GLOBAL(div_table_internal) & 65535, r20
1237#endif
1238 .global GLOBAL(sdivsi3_2)
1239 // div_table in r20
1240 // clobbered: r1,r18,r19,r21,r25,tr0
1241GLOBAL(sdivsi3_2):
1242 nsb r5, r1
1243 shlld r5, r1, r25 // normalize; [-2 ..1, 1..2) in s2.62
1244 shari r25, 58, r21 // extract 5(6) bit index (s2.4 with hole -1..1)
1245 ldx.ub r20, r21, r19 // u0.8
1246 shari r25, 32, r25 // normalize to s2.30
1247 shlli r21, 1, r21
1248 muls.l r25, r19, r19 // s2.38
1249 ldx.w r20, r21, r21 // s2.14
1250 ptabs r18, tr0
1251 shari r19, 24, r19 // truncate to s2.14
1252 sub r21, r19, r19 // some 11 bit inverse in s1.14
1253 muls.l r19, r19, r21 // u0.28
1254 sub r63, r1, r1
1255 addi r1, 92, r1
1256 muls.l r25, r21, r18 // s2.58
1257 shlli r19, 45, r19 // multiply by two and convert to s2.58
1258 /* bubble */
1259 sub r19, r18, r18
1260 shari r18, 28, r18 // some 22 bit inverse in s1.30
1261 muls.l r18, r25, r0 // s2.60
1262 muls.l r18, r4, r25 // s32.30
1263 /* bubble */
1264 shari r0, 16, r19 // s-16.44
1265 muls.l r19, r18, r19 // s-16.74
1266 shari r25, 63, r0
1267 shari r4, 14, r18 // s19.-14
1268 shari r19, 30, r19 // s-16.44
1269 muls.l r19, r18, r19 // s15.30
1270 xor r21, r0, r21 // You could also use the constant 1 << 27.
1271 add r21, r25, r21
1272 sub r21, r19, r21
1273 shard r21, r1, r21
1274 sub r21, r0, r0
1275 blink tr0, r63
1276#ifndef __pic__
1277 ENDFUNC(GLOBAL(sdivsi3))
1278#endif
1279 ENDFUNC(GLOBAL(sdivsi3_2))
0c63e844 1280#endif
273fffd6 1281#elif __SHMEDIA__
0c63e844 1282/* m5compact-nofpu */
1283 // clobbered: r18,r19,r20,r21,r25,tr0,tr1,tr2
1284 .mode SHmedia
1285 .section .text..SHmedia32,"ax"
1286 .align 2
59312820 1287 FUNC(GLOBAL(sdivsi3))
0c63e844 1288GLOBAL(sdivsi3):
1289 pt/l LOCAL(sdivsi3_dontsub), tr0
1290 pt/l LOCAL(sdivsi3_loop), tr1
1291 ptabs/l r18,tr2
1292 shari.l r4,31,r18
1293 shari.l r5,31,r19
1294 xor r4,r18,r20
1295 xor r5,r19,r21
1296 sub.l r20,r18,r20
1297 sub.l r21,r19,r21
1298 xor r18,r19,r19
1299 shlli r21,32,r25
1300 addi r25,-1,r21
1301 addz.l r20,r63,r20
1302LOCAL(sdivsi3_loop):
1303 shlli r20,1,r20
1304 bgeu/u r21,r20,tr0
1305 sub r20,r21,r20
1306LOCAL(sdivsi3_dontsub):
1307 addi.l r25,-1,r25
1308 bnei r25,-32,tr1
1309 xor r20,r19,r20
1310 sub.l r20,r19,r0
1311 blink tr2,r63
59312820 1312 ENDFUNC(GLOBAL(sdivsi3))
0c63e844 1313#else /* ! __SHMEDIA__ */
59312820 1314 FUNC(GLOBAL(sdivsi3))
16f1dae0 1315GLOBAL(sdivsi3):
a85a2db0 1316 mov r4,r1
1317 mov r5,r0
30b0bb86 1318
a85a2db0 1319 tst r0,r0
1320 bt div0
1321 mov #0,r2
1322 div0s r2,r1
1323 subc r3,r3
1324 subc r2,r1
1325 div0s r0,r3
1326 rotcl r1
1327 div1 r0,r3
1328 rotcl r1
1329 div1 r0,r3
1330 rotcl r1
1331 div1 r0,r3
1332 rotcl r1
1333 div1 r0,r3
1334 rotcl r1
1335 div1 r0,r3
1336 rotcl r1
1337 div1 r0,r3
1338 rotcl r1
1339 div1 r0,r3
1340 rotcl r1
1341 div1 r0,r3
1342 rotcl r1
1343 div1 r0,r3
1344 rotcl r1
1345 div1 r0,r3
1346 rotcl r1
1347 div1 r0,r3
1348 rotcl r1
1349 div1 r0,r3
1350 rotcl r1
1351 div1 r0,r3
1352 rotcl r1
1353 div1 r0,r3
1354 rotcl r1
1355 div1 r0,r3
1356 rotcl r1
1357 div1 r0,r3
1358 rotcl r1
1359 div1 r0,r3
1360 rotcl r1
1361 div1 r0,r3
1362 rotcl r1
1363 div1 r0,r3
1364 rotcl r1
1365 div1 r0,r3
1366 rotcl r1
1367 div1 r0,r3
1368 rotcl r1
1369 div1 r0,r3
1370 rotcl r1
1371 div1 r0,r3
1372 rotcl r1
1373 div1 r0,r3
1374 rotcl r1
1375 div1 r0,r3
1376 rotcl r1
1377 div1 r0,r3
1378 rotcl r1
1379 div1 r0,r3
1380 rotcl r1
1381 div1 r0,r3
1382 rotcl r1
1383 div1 r0,r3
1384 rotcl r1
1385 div1 r0,r3
1386 rotcl r1
1387 div1 r0,r3
1388 rotcl r1
1389 div1 r0,r3
1390 rotcl r1
1391 addc r2,r1
30b0bb86 1392 rts
a85a2db0 1393 mov r1,r0
1394
30b0bb86 1395
a85a2db0 1396div0: rts
1397 mov #0,r0
1398
805e22b2 1399 ENDFUNC(GLOBAL(sdivsi3))
fe9c9e23 1400#endif /* ! __SHMEDIA__ */
1401#endif /* L_sdivsi3 */
1402
1403/*------------------------------------------------------------------------------
1404 32 bit unsigned integer division that uses FPU double precision division. */
a85a2db0 1405
fe9c9e23 1406#ifdef L_udivsi3_i4
a85a2db0 1407 .title "SH DIVIDE"
fe9c9e23 1408
bb057878 1409#if defined (__SH4__) || defined (__SH2A__)
fe9c9e23 1410/* This variant is used when FPSCR.PR = 1 (double precision) is the default
1411 setting.
1412 Args in r4 and r5, result in fpul,
1413 clobber r0, r1, r4, r5, dr0, dr2, dr4, and t bit */
1b61190c 1414
16f1dae0 1415 .global GLOBAL(udivsi3_i4)
59312820 1416 HIDDEN_FUNC(GLOBAL(udivsi3_i4))
16f1dae0 1417GLOBAL(udivsi3_i4):
fe9c9e23 1418 mov #1,r1
1419 cmp/hi r1,r5
1420 bf/s trivial
1421 rotr r1
1422 xor r1,r4
1423 lds r4,fpul
1424 mova L1,r0
1b61190c 1425#ifdef FMOVD_WORKS
fe9c9e23 1426 fmov.d @r0+,dr4
1b61190c 1427#else
fe9c9e23 1428 fmov.s @r0+,DR40
1429 fmov.s @r0,DR41
1b61190c 1430#endif
fe9c9e23 1431 float fpul,dr0
1432 xor r1,r5
1433 lds r5,fpul
1434 float fpul,dr2
1435 fadd dr4,dr0
1436 fadd dr4,dr2
1437 fdiv dr2,dr0
1b61190c 1438 rts
fe9c9e23 1439 ftrc dr0,fpul
1b61190c 1440
1441trivial:
1442 rts
fe9c9e23 1443 lds r4,fpul
1b61190c 1444
1445 .align 2
6a807829 1446#ifdef FMOVD_WORKS
fe9c9e23 1447 .align 3 // Make the double below 8 byte aligned.
6a807829 1448#endif
1b61190c 1449L1:
1450 .double 2147483648
1451
805e22b2 1452 ENDFUNC(GLOBAL(udivsi3_i4))
fe9c9e23 1453
bb057878 1454#elif defined (__SH5__) && ! defined (__SH4_NOFPU__) && ! defined (__SH2A_NOFPU__)
de0cf984 1455#if ! __SH5__ || __SH5__ == 32
1456!! args in r4 and r5, result in fpul, clobber r20, r21, dr0, fr33
1457 .mode SHmedia
1458 .global GLOBAL(udivsi3_i4)
59312820 1459 HIDDEN_FUNC(GLOBAL(udivsi3_i4))
de0cf984 1460GLOBAL(udivsi3_i4):
1461 addz.l r4,r63,r20
1462 addz.l r5,r63,r21
1463 fmov.qd r20,dr0
1464 fmov.qd r21,dr32
1465 ptabs r18,tr0
1466 float.qd dr0,dr0
1467 float.qd dr32,dr32
1468 fdiv.d dr0,dr32,dr0
1469 ftrc.dq dr0,dr32
1470 fmov.s fr33,fr32
1471 blink tr0,r63
805e22b2 1472
1473 ENDFUNC(GLOBAL(udivsi3_i4))
de0cf984 1474#endif /* ! __SH5__ || __SH5__ == 32 */
fe9c9e23 1475
bb057878 1476#elif defined (__SH2A_SINGLE__) || defined (__SH2A_SINGLE_ONLY__) || defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
fe9c9e23 1477/* This variant is used when FPSCR.PR = 0 (sigle precision) is the default
1478 setting.
1479 Args in r4 and r5, result in fpul,
1480 clobber r0, r1, r4, r5, dr0, dr2, dr4.
1481 For this to work, we must temporarily switch the FPU do double precision,
1482 but we better do not touch FPSCR.FR. See PR 6526. */
1b61190c 1483
16f1dae0 1484 .global GLOBAL(udivsi3_i4)
59312820 1485 HIDDEN_FUNC(GLOBAL(udivsi3_i4))
16f1dae0 1486GLOBAL(udivsi3_i4):
fe9c9e23 1487
1488#ifndef __SH4A__
1489 mov #1,r1
1490 cmp/hi r1,r5
1491 bf/s trivial
1492 rotr r1 // r1 = 1 << 31
1493 sts.l fpscr,@-r15
1494 xor r1,r4
1495 mov.l @(0,r15),r0
1496 xor r1,r5
1497 mov.l L2,r1
1498 lds r4,fpul
1499 or r0,r1
1500 mova L1,r0
1501 lds r1,fpscr
1b61190c 1502#ifdef FMOVD_WORKS
fe9c9e23 1503 fmov.d @r0+,dr4
1b61190c 1504#else
fe9c9e23 1505 fmov.s @r0+,DR40
1506 fmov.s @r0,DR41
1b61190c 1507#endif
fe9c9e23 1508 float fpul,dr0
1509 lds r5,fpul
1510 float fpul,dr2
1511 fadd dr4,dr0
1512 fadd dr4,dr2
1513 fdiv dr2,dr0
1514 ftrc dr0,fpul
1b61190c 1515 rts
fe9c9e23 1516 lds.l @r15+,fpscr
1b61190c 1517
6a807829 1518#ifdef FMOVD_WORKS
fe9c9e23 1519 .align 3 // Make the double below 8 byte aligned.
6a807829 1520#endif
1b61190c 1521trivial:
1522 rts
fe9c9e23 1523 lds r4,fpul
1b61190c 1524
1525 .align 2
fe9c9e23 1526L2:
1527#ifdef FMOVD_WORKS
1528 .long 0x180000 // FPSCR.PR = 1, FPSCR.SZ = 1
1b61190c 1529#else
fe9c9e23 1530 .long 0x80000 // FPSCR.PR = 1
1b61190c 1531#endif
fe9c9e23 1532L1:
1b61190c 1533 .double 2147483648
1534
fe9c9e23 1535#else
1536/* On SH4A we can use the fpchg instruction to flip the FPSCR.PR bit.
1537 Although on SH4A fmovd usually works, it would require either additional
1538 two fschg instructions or an FPSCR push + pop. It's not worth the effort
1539 for loading only one double constant. */
1540 mov #1,r1
1541 cmp/hi r1,r5
1542 bf/s trivial
1543 rotr r1 // r1 = 1 << 31
1544 fpchg
1545 mova L1,r0
1546 xor r1,r4
1547 fmov.s @r0+,DR40
1548 lds r4,fpul
1549 fmov.s @r0,DR41
1550 xor r1,r5
1551 float fpul,dr0
1552 lds r5,fpul
1553 float fpul,dr2
1554 fadd dr4,dr0
1555 fadd dr4,dr2
1556 fdiv dr2,dr0
1557 ftrc dr0,fpul
1558 rts
1559 fpchg
1560
1561trivial:
1562 rts
1563 lds r4,fpul
1564
1565 .align 2
1566L1:
1567 .double 2147483648
1568
1569#endif /* __SH4A__ */
1570
1571
805e22b2 1572 ENDFUNC(GLOBAL(udivsi3_i4))
1b61190c 1573#endif /* ! __SH4__ */
fe9c9e23 1574#endif /* L_udivsi3_i4 */
1b61190c 1575
1576#ifdef L_udivsi3
1577/* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
87ed74ef 1578 sh2e/sh3e code. */
a85a2db0 1579
570d13b9 1580!! args in r4 and r5, result in r0, clobbers r4, pr, and t bit
16f1dae0 1581 .global GLOBAL(udivsi3)
59312820 1582 HIDDEN_FUNC(GLOBAL(udivsi3))
a85a2db0 1583
87e19636 1584#if __SHMEDIA__
1585#if __SH5__ == 32
1586 .section .text..SHmedia32,"ax"
1587#else
1588 .text
1589#endif
1590 .align 2
0c63e844 1591#if 0
87e19636 1592/* The assembly code that follows is a hand-optimized version of the C
1593 code that follows. Note that the registers that are modified are
1594 exactly those listed as clobbered in the patterns udivsi3_i1 and
1595 udivsi3_i1_media.
1596
1597unsigned
1598__udivsi3 (i, j)
1599 unsigned i, j;
1600{
1601 register unsigned long long r0 asm ("r0") = 0;
1602 register unsigned long long r18 asm ("r18") = 1;
1603 register unsigned long long r4 asm ("r4") = i;
1604 register unsigned long long r19 asm ("r19") = j;
1605
1606 r19 <<= 31;
1607 r18 <<= 31;
1608 do
1609 if (r4 >= r19)
1610 r0 |= r18, r4 -= r19;
1611 while (r19 >>= 1, r18 >>= 1);
1612
1613 return r0;
1614}
1615*/
1616GLOBAL(udivsi3):
1617 pt/l LOCAL(udivsi3_dontadd), tr2
1618 pt/l LOCAL(udivsi3_loop), tr1
1619 ptabs/l r18, tr0
1620 movi 0, r0
1621 movi 1, r18
1622 addz.l r5, r63, r19
1623 addz.l r4, r63, r4
1624 shlli r19, 31, r19
1625 shlli r18, 31, r18
1626LOCAL(udivsi3_loop):
1627 bgtu r19, r4, tr2
1628 or r0, r18, r0
1629 sub r4, r19, r4
1630LOCAL(udivsi3_dontadd):
1631 shlri r18, 1, r18
1632 shlri r19, 1, r19
1633 bnei r18, 0, tr1
1634 blink tr0, r63
1635#else
16f1dae0 1636GLOBAL(udivsi3):
0c63e844 1637 // inputs: r4,r5
1638 // clobbered: r18,r19,r20,r21,r22,r25,tr0
1639 // result in r0.
1640 addz.l r5,r63,r22
1641 nsb r22,r0
1642 shlld r22,r0,r25
1643 shlri r25,48,r25
1644 movi 0xffffffffffffbb0c,r20 // shift count eqiv 76
1645 sub r20,r25,r21
1646 mmulfx.w r21,r21,r19
1647 mshflo.w r21,r63,r21
1648 ptabs r18,tr0
1649 mmulfx.w r25,r19,r19
1650 sub r20,r0,r0
1651 /* bubble */
1652 msub.w r21,r19,r19
1653 addi r19,-2,r21 /* It would be nice for scheduling to do this add to r21
1654 before the msub.w, but we need a different value for
1655 r19 to keep errors under control. */
1656 mulu.l r4,r21,r18
1657 mmulfx.w r19,r19,r19
1658 shlli r21,15,r21
1659 shlrd r18,r0,r18
1660 mulu.l r18,r22,r20
1661 mmacnfx.wl r25,r19,r21
1662 /* bubble */
1663 sub r4,r20,r25
1664
1665 mulu.l r25,r21,r19
1666 addi r0,14,r0
1667 /* bubble */
1668 shlrd r19,r0,r19
1669 mulu.l r19,r22,r20
1670 add r18,r19,r18
1671 /* bubble */
1672 sub.l r25,r20,r25
1673
1674 mulu.l r25,r21,r19
1675 addz.l r25,r63,r25
1676 sub r25,r22,r25
1677 shlrd r19,r0,r19
1678 mulu.l r19,r22,r20
1679 addi r25,1,r25
1680 add r18,r19,r18
1681
1682 cmpgt r25,r20,r25
1683 add.l r18,r25,r0
1684 blink tr0,r63
1685#endif
273fffd6 1686#elif __SHMEDIA__
0c63e844 1687/* m5compact-nofpu - more emphasis on code size than on speed, but don't
1688 ignore speed altogether - div1 needs 9 cycles, subc 7 and rotcl 4.
1689 So use a short shmedia loop. */
1690 // clobbered: r20,r21,r25,tr0,tr1,tr2
1691 .mode SHmedia
1692 .section .text..SHmedia32,"ax"
1693 .align 2
1694GLOBAL(udivsi3):
1695 pt/l LOCAL(udivsi3_dontsub), tr0
1696 pt/l LOCAL(udivsi3_loop), tr1
1697 ptabs/l r18,tr2
1698 shlli r5,32,r25
1699 addi r25,-1,r21
1700 addz.l r4,r63,r20
1701LOCAL(udivsi3_loop):
1702 shlli r20,1,r20
1703 bgeu/u r21,r20,tr0
1704 sub r20,r21,r20
1705LOCAL(udivsi3_dontsub):
1706 addi.l r25,-1,r25
1707 bnei r25,-32,tr1
1708 add.l r20,r63,r0
1709 blink tr2,r63
273fffd6 1710#else /* ! __SHMEDIA__ */
0c63e844 1711LOCAL(div8):
1712 div1 r5,r4
1713LOCAL(div7):
1714 div1 r5,r4; div1 r5,r4; div1 r5,r4
1715 div1 r5,r4; div1 r5,r4; div1 r5,r4; rts; div1 r5,r4
1716
1717LOCAL(divx4):
1718 div1 r5,r4; rotcl r0
1719 div1 r5,r4; rotcl r0
1720 div1 r5,r4; rotcl r0
1721 rts; div1 r5,r4
1722
1723GLOBAL(udivsi3):
1724 sts.l pr,@-r15
1725 extu.w r5,r0
1726 cmp/eq r5,r0
1727#ifdef __sh1__
1728 bf LOCAL(large_divisor)
1729#else
1730 bf/s LOCAL(large_divisor)
1731#endif
1732 div0u
1733 swap.w r4,r0
1734 shlr16 r4
1735 bsr LOCAL(div8)
1736 shll16 r5
1737 bsr LOCAL(div7)
1738 div1 r5,r4
1739 xtrct r4,r0
1740 xtrct r0,r4
1741 bsr LOCAL(div8)
1742 swap.w r4,r4
1743 bsr LOCAL(div7)
1744 div1 r5,r4
1745 lds.l @r15+,pr
1746 xtrct r4,r0
1747 swap.w r0,r0
1748 rotcl r0
1749 rts
1750 shlr16 r5
1751
1752LOCAL(large_divisor):
1753#ifdef __sh1__
1754 div0u
1755#endif
1756 mov #0,r0
1757 xtrct r4,r0
1758 xtrct r0,r4
1759 bsr LOCAL(divx4)
1760 rotcl r0
1761 bsr LOCAL(divx4)
1762 rotcl r0
1763 bsr LOCAL(divx4)
1764 rotcl r0
1765 bsr LOCAL(divx4)
1766 rotcl r0
1767 lds.l @r15+,pr
1768 rts
1769 rotcl r0
a85a2db0 1770
805e22b2 1771 ENDFUNC(GLOBAL(udivsi3))
87e19636 1772#endif /* ! __SHMEDIA__ */
0c63e844 1773#endif /* L_udivsi3 */
1774
1775#ifdef L_udivdi3
273fffd6 1776#if __SHMEDIA__
0c63e844 1777 .mode SHmedia
1778 .section .text..SHmedia32,"ax"
1779 .align 2
1780 .global GLOBAL(udivdi3)
619f47f5 1781 FUNC(GLOBAL(udivdi3))
0c63e844 1782GLOBAL(udivdi3):
59312820 1783 HIDDEN_ALIAS(udivdi3_internal,udivdi3)
0c63e844 1784 shlri r3,1,r4
1785 nsb r4,r22
1786 shlld r3,r22,r6
1787 shlri r6,49,r5
1788 movi 0xffffffffffffbaf1,r21 /* .l shift count 17. */
1789 sub r21,r5,r1
1790 mmulfx.w r1,r1,r4
1791 mshflo.w r1,r63,r1
1792 sub r63,r22,r20 // r63 == 64 % 64
1793 mmulfx.w r5,r4,r4
1794 pta LOCAL(large_divisor),tr0
1795 addi r20,32,r9
1796 msub.w r1,r4,r1
1797 madd.w r1,r1,r1
1798 mmulfx.w r1,r1,r4
1799 shlri r6,32,r7
1800 bgt/u r9,r63,tr0 // large_divisor
1801 mmulfx.w r5,r4,r4
79c2c2aa 1802 shlri r2,32+14,r19
1803 addi r22,-31,r0
0c63e844 1804 msub.w r1,r4,r1
1805
1806 mulu.l r1,r7,r4
1807 addi r1,-3,r5
1808 mulu.l r5,r19,r5
79c2c2aa 1809 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
0c63e844 1810 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1811 the case may be, %0000000000000000 000.11111111111, still */
1812 muls.l r1,r4,r4 /* leaving at least one sign bit. */
79c2c2aa 1813 mulu.l r5,r3,r8
0c63e844 1814 mshalds.l r1,r21,r1
1815 shari r4,26,r4
79c2c2aa 1816 shlld r8,r0,r8
1817 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1818 sub r2,r8,r2
0c63e844 1819 /* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */
1820
1821 shlri r2,22,r21
1822 mulu.l r21,r1,r21
79c2c2aa 1823 shlld r5,r0,r8
0c63e844 1824 addi r20,30-22,r0
0c63e844 1825 shlrd r21,r0,r21
1826 mulu.l r21,r3,r5
1827 add r8,r21,r8
79c2c2aa 1828 mcmpgt.l r21,r63,r21 // See Note 1
0c63e844 1829 addi r20,30,r0
1830 mshfhi.l r63,r21,r21
1831 sub r2,r5,r2
1832 andc r2,r21,r2
1833
1834 /* small divisor: need a third divide step */
1835 mulu.l r2,r1,r7
1836 ptabs r18,tr0
1837 addi r2,1,r2
1838 shlrd r7,r0,r7
1839 mulu.l r7,r3,r5
1840 add r8,r7,r8
1841 sub r2,r3,r2
1842 cmpgt r2,r5,r5
1843 add r8,r5,r2
1844 /* could test r3 here to check for divide by zero. */
1845 blink tr0,r63
1846
1847LOCAL(large_divisor):
1848 mmulfx.w r5,r4,r4
1849 shlrd r2,r9,r25
1850 shlri r25,32,r8
1851 msub.w r1,r4,r1
1852
1853 mulu.l r1,r7,r4
1854 addi r1,-3,r5
1855 mulu.l r5,r8,r5
79c2c2aa 1856 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
0c63e844 1857 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1858 the case may be, %0000000000000000 000.11111111111, still */
1859 muls.l r1,r4,r4 /* leaving at least one sign bit. */
79c2c2aa 1860 shlri r5,14-1,r8
0c63e844 1861 mulu.l r8,r7,r5
1862 mshalds.l r1,r21,r1
1863 shari r4,26,r4
79c2c2aa 1864 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
0c63e844 1865 sub r25,r5,r25
1866 /* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */
1867
1868 shlri r25,22,r21
1869 mulu.l r21,r1,r21
1870 pta LOCAL(no_lo_adj),tr0
1871 addi r22,32,r0
1872 shlri r21,40,r21
1873 mulu.l r21,r7,r5
1874 add r8,r21,r8
1875 shlld r2,r0,r2
1876 sub r25,r5,r25
79c2c2aa 1877 bgtu/u r7,r25,tr0 // no_lo_adj
0c63e844 1878 addi r8,1,r8
79c2c2aa 1879 sub r25,r7,r25
0c63e844 1880LOCAL(no_lo_adj):
79c2c2aa 1881 mextr4 r2,r25,r2
0c63e844 1882
1883 /* large_divisor: only needs a few adjustments. */
1884 mulu.l r8,r6,r5
1885 ptabs r18,tr0
1886 /* bubble */
1887 cmpgtu r5,r2,r5
1888 sub r8,r5,r2
1889 blink tr0,r63
619f47f5 1890 ENDFUNC(GLOBAL(udivdi3))
0c63e844 1891/* Note 1: To shift the result of the second divide stage so that the result
1892 always fits into 32 bits, yet we still reduce the rest sufficiently
1893 would require a lot of instructions to do the shifts just right. Using
1894 the full 64 bit shift result to multiply with the divisor would require
1895 four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
808a491c 1896 Fortunately, if the upper 32 bits of the shift result are nonzero, we
0c63e844 1897 know that the rest after taking this partial result into account will
1898 fit into 32 bits. So we just clear the upper 32 bits of the rest if the
808a491c 1899 upper 32 bits of the partial result are nonzero. */
0c63e844 1900#endif /* __SHMEDIA__ */
1901#endif /* L_udivdi3 */
1902
1903#ifdef L_divdi3
273fffd6 1904#if __SHMEDIA__
0c63e844 1905 .mode SHmedia
1906 .section .text..SHmedia32,"ax"
1907 .align 2
1908 .global GLOBAL(divdi3)
619f47f5 1909 FUNC(GLOBAL(divdi3))
0c63e844 1910GLOBAL(divdi3):
59312820 1911 pta GLOBAL(udivdi3_internal),tr0
0c63e844 1912 shari r2,63,r22
1913 shari r3,63,r23
1914 xor r2,r22,r2
1915 xor r3,r23,r3
1916 sub r2,r22,r2
1917 sub r3,r23,r3
1918 beq/u r22,r23,tr0
1919 ptabs r18,tr1
1920 blink tr0,r18
1921 sub r63,r2,r2
1922 blink tr1,r63
619f47f5 1923 ENDFUNC(GLOBAL(divdi3))
0c63e844 1924#endif /* __SHMEDIA__ */
1925#endif /* L_divdi3 */
1926
1927#ifdef L_umoddi3
273fffd6 1928#if __SHMEDIA__
0c63e844 1929 .mode SHmedia
1930 .section .text..SHmedia32,"ax"
1931 .align 2
1932 .global GLOBAL(umoddi3)
619f47f5 1933 FUNC(GLOBAL(umoddi3))
0c63e844 1934GLOBAL(umoddi3):
59312820 1935 HIDDEN_ALIAS(umoddi3_internal,umoddi3)
0c63e844 1936 shlri r3,1,r4
1937 nsb r4,r22
1938 shlld r3,r22,r6
1939 shlri r6,49,r5
1940 movi 0xffffffffffffbaf1,r21 /* .l shift count 17. */
1941 sub r21,r5,r1
1942 mmulfx.w r1,r1,r4
1943 mshflo.w r1,r63,r1
1944 sub r63,r22,r20 // r63 == 64 % 64
1945 mmulfx.w r5,r4,r4
1946 pta LOCAL(large_divisor),tr0
1947 addi r20,32,r9
1948 msub.w r1,r4,r1
1949 madd.w r1,r1,r1
1950 mmulfx.w r1,r1,r4
1951 shlri r6,32,r7
1952 bgt/u r9,r63,tr0 // large_divisor
1953 mmulfx.w r5,r4,r4
79c2c2aa 1954 shlri r2,32+14,r19
1955 addi r22,-31,r0
0c63e844 1956 msub.w r1,r4,r1
1957
1958 mulu.l r1,r7,r4
1959 addi r1,-3,r5
1960 mulu.l r5,r19,r5
79c2c2aa 1961 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
0c63e844 1962 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1963 the case may be, %0000000000000000 000.11111111111, still */
1964 muls.l r1,r4,r4 /* leaving at least one sign bit. */
79c2c2aa 1965 mulu.l r5,r3,r5
0c63e844 1966 mshalds.l r1,r21,r1
1967 shari r4,26,r4
79c2c2aa 1968 shlld r5,r0,r5
1969 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
0c63e844 1970 sub r2,r5,r2
1971 /* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */
1972
1973 shlri r2,22,r21
1974 mulu.l r21,r1,r21
1975 addi r20,30-22,r0
1976 /* bubble */ /* could test r3 here to check for divide by zero. */
1977 shlrd r21,r0,r21
1978 mulu.l r21,r3,r5
79c2c2aa 1979 mcmpgt.l r21,r63,r21 // See Note 1
0c63e844 1980 addi r20,30,r0
1981 mshfhi.l r63,r21,r21
1982 sub r2,r5,r2
1983 andc r2,r21,r2
1984
1985 /* small divisor: need a third divide step */
1986 mulu.l r2,r1,r7
1987 ptabs r18,tr0
1988 sub r2,r3,r8 /* re-use r8 here for rest - r3 */
1989 shlrd r7,r0,r7
1990 mulu.l r7,r3,r5
1991 /* bubble */
1992 addi r8,1,r7
1993 cmpgt r7,r5,r7
1994 cmvne r7,r8,r2
1995 sub r2,r5,r2
1996 blink tr0,r63
1997
1998LOCAL(large_divisor):
1999 mmulfx.w r5,r4,r4
2000 shlrd r2,r9,r25
2001 shlri r25,32,r8
2002 msub.w r1,r4,r1
2003
2004 mulu.l r1,r7,r4
2005 addi r1,-3,r5
2006 mulu.l r5,r8,r5
79c2c2aa 2007 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
0c63e844 2008 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
2009 the case may be, %0000000000000000 000.11111111111, still */
2010 muls.l r1,r4,r4 /* leaving at least one sign bit. */
79c2c2aa 2011 shlri r5,14-1,r8
0c63e844 2012 mulu.l r8,r7,r5
2013 mshalds.l r1,r21,r1
2014 shari r4,26,r4
79c2c2aa 2015 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
0c63e844 2016 sub r25,r5,r25
2017 /* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */
2018
2019 shlri r25,22,r21
2020 mulu.l r21,r1,r21
2021 pta LOCAL(no_lo_adj),tr0
2022 addi r22,32,r0
2023 shlri r21,40,r21
2024 mulu.l r21,r7,r5
2025 add r8,r21,r8
2026 shlld r2,r0,r2
2027 sub r25,r5,r25
79c2c2aa 2028 bgtu/u r7,r25,tr0 // no_lo_adj
0c63e844 2029 addi r8,1,r8
79c2c2aa 2030 sub r25,r7,r25
0c63e844 2031LOCAL(no_lo_adj):
79c2c2aa 2032 mextr4 r2,r25,r2
0c63e844 2033
2034 /* large_divisor: only needs a few adjustments. */
2035 mulu.l r8,r6,r5
2036 ptabs r18,tr0
79c2c2aa 2037 add r2,r6,r7
0c63e844 2038 cmpgtu r5,r2,r8
2039 cmvne r8,r7,r2
2040 sub r2,r5,r2
79c2c2aa 2041 shlrd r2,r22,r2
0c63e844 2042 blink tr0,r63
619f47f5 2043 ENDFUNC(GLOBAL(umoddi3))
0c63e844 2044/* Note 1: To shift the result of the second divide stage so that the result
2045 always fits into 32 bits, yet we still reduce the rest sufficiently
2046 would require a lot of instructions to do the shifts just right. Using
2047 the full 64 bit shift result to multiply with the divisor would require
2048 four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
808a491c 2049 Fortunately, if the upper 32 bits of the shift result are nonzero, we
0c63e844 2050 know that the rest after taking this partial result into account will
2051 fit into 32 bits. So we just clear the upper 32 bits of the rest if the
808a491c 2052 upper 32 bits of the partial result are nonzero. */
0c63e844 2053#endif /* __SHMEDIA__ */
2054#endif /* L_umoddi3 */
2055
2056#ifdef L_moddi3
273fffd6 2057#if __SHMEDIA__
0c63e844 2058 .mode SHmedia
2059 .section .text..SHmedia32,"ax"
2060 .align 2
2061 .global GLOBAL(moddi3)
619f47f5 2062 FUNC(GLOBAL(moddi3))
0c63e844 2063GLOBAL(moddi3):
59312820 2064 pta GLOBAL(umoddi3_internal),tr0
0c63e844 2065 shari r2,63,r22
2066 shari r3,63,r23
2067 xor r2,r22,r2
2068 xor r3,r23,r3
2069 sub r2,r22,r2
2070 sub r3,r23,r3
2071 beq/u r22,r63,tr0
2072 ptabs r18,tr1
2073 blink tr0,r18
2074 sub r63,r2,r2
2075 blink tr1,r63
619f47f5 2076 ENDFUNC(GLOBAL(moddi3))
0c63e844 2077#endif /* __SHMEDIA__ */
2078#endif /* L_moddi3 */
2079
d73f1571 2080#ifdef L_set_fpscr
7105fb72 2081#if !defined (__SH2A_NOFPU__)
2082#if defined (__SH2E__) || defined (__SH2A__) || defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || __SH5__ == 32
87e19636 2083#ifdef __SH5__
2084 .mode SHcompact
2085#endif
16f1dae0 2086 .global GLOBAL(set_fpscr)
59312820 2087 HIDDEN_FUNC(GLOBAL(set_fpscr))
16f1dae0 2088GLOBAL(set_fpscr):
d73f1571 2089 lds r4,fpscr
619f47f5 2090#ifdef __PIC__
2091 mov.l r12,@-r15
a9cfe83b 2092#ifdef __vxworks
2093 mov.l LOCAL(set_fpscr_L0_base),r12
2094 mov.l LOCAL(set_fpscr_L0_index),r0
2095 mov.l @r12,r12
2096 mov.l @(r0,r12),r12
2097#else
619f47f5 2098 mova LOCAL(set_fpscr_L0),r0
2099 mov.l LOCAL(set_fpscr_L0),r12
2100 add r0,r12
a9cfe83b 2101#endif
619f47f5 2102 mov.l LOCAL(set_fpscr_L1),r0
2103 mov.l @(r0,r12),r1
2104 mov.l @r15+,r12
2105#else
16f1dae0 2106 mov.l LOCAL(set_fpscr_L1),r1
619f47f5 2107#endif
d73f1571 2108 swap.w r4,r0
2109 or #24,r0
1b61190c 2110#ifndef FMOVD_WORKS
d73f1571 2111 xor #16,r0
1b61190c 2112#endif
7105fb72 2113#if defined(__SH4__) || defined (__SH2A_DOUBLE__)
1b61190c 2114 swap.w r0,r3
2115 mov.l r3,@(4,r1)
87ed74ef 2116#else /* defined (__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */
d73f1571 2117 swap.w r0,r2
2118 mov.l r2,@r1
1b61190c 2119#endif
2120#ifndef FMOVD_WORKS
d73f1571 2121 xor #8,r0
1b61190c 2122#else
2123 xor #24,r0
2124#endif
7105fb72 2125#if defined(__SH4__) || defined (__SH2A_DOUBLE__)
1b61190c 2126 swap.w r0,r2
2127 rts
2128 mov.l r2,@r1
87ed74ef 2129#else /* defined(__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */
d73f1571 2130 swap.w r0,r3
2131 rts
2132 mov.l r3,@(4,r1)
1b61190c 2133#endif
d73f1571 2134 .align 2
619f47f5 2135#ifdef __PIC__
a9cfe83b 2136#ifdef __vxworks
2137LOCAL(set_fpscr_L0_base):
2138 .long ___GOTT_BASE__
2139LOCAL(set_fpscr_L0_index):
2140 .long ___GOTT_INDEX__
2141#else
619f47f5 2142LOCAL(set_fpscr_L0):
2143 .long _GLOBAL_OFFSET_TABLE_
a9cfe83b 2144#endif
619f47f5 2145LOCAL(set_fpscr_L1):
2146 .long GLOBAL(fpscr_values@GOT)
2147#else
16f1dae0 2148LOCAL(set_fpscr_L1):
2149 .long GLOBAL(fpscr_values)
619f47f5 2150#endif
805e22b2 2151
2152 ENDFUNC(GLOBAL(set_fpscr))
c03bb5e0 2153#ifndef NO_FPSCR_VALUES
d73f1571 2154#ifdef __ELF__
16f1dae0 2155 .comm GLOBAL(fpscr_values),8,4
d73f1571 2156#else
16f1dae0 2157 .comm GLOBAL(fpscr_values),8
d73f1571 2158#endif /* ELF */
c03bb5e0 2159#endif /* NO_FPSCR_VALUES */
87ed74ef 2160#endif /* SH2E / SH3E / SH4 */
7105fb72 2161#endif /* __SH2A_NOFPU__ */
d73f1571 2162#endif /* L_set_fpscr */
4e734737 2163#ifdef L_ic_invalidate
87e19636 2164#if __SH5__ == 32
2165 .mode SHmedia
2166 .section .text..SHmedia32,"ax"
2167 .align 2
e40c2d35 2168 .global GLOBAL(init_trampoline)
59312820 2169 HIDDEN_FUNC(GLOBAL(init_trampoline))
e40c2d35 2170GLOBAL(init_trampoline):
2171 st.l r0,8,r2
2172#ifdef __LITTLE_ENDIAN__
2173 movi 9,r20
2174 shori 0x402b,r20
2175 shori 0xd101,r20
2176 shori 0xd002,r20
2177#else
2178 movi 0xffffffffffffd002,r20
2179 shori 0xd101,r20
2180 shori 0x402b,r20
2181 shori 9,r20
2182#endif
2183 st.q r0,0,r20
2184 st.l r0,12,r3
59312820 2185 ENDFUNC(GLOBAL(init_trampoline))
87e19636 2186 .global GLOBAL(ic_invalidate)
59312820 2187 HIDDEN_FUNC(GLOBAL(ic_invalidate))
87e19636 2188GLOBAL(ic_invalidate):
0c63e844 2189 ocbwb r0,0
2190 synco
87e19636 2191 icbi r0, 0
2192 ptabs r18, tr0
2193 synci
2194 blink tr0, r63
619f47f5 2195 ENDFUNC(GLOBAL(ic_invalidate))
9435e831 2196#elif defined(__SH4A__)
2197 .global GLOBAL(ic_invalidate)
59312820 2198 HIDDEN_FUNC(GLOBAL(ic_invalidate))
9435e831 2199GLOBAL(ic_invalidate):
2200 ocbwb @r4
2201 synco
9435e831 2202 icbi @r4
85714674 2203 rts
2204 nop
9435e831 2205 ENDFUNC(GLOBAL(ic_invalidate))
59312820 2206#elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))
2207 /* For system code, we use ic_invalidate_line_i, but user code
2208 needs a different mechanism. A kernel call is generally not
2209 available, and it would also be slow. Different SH4 variants use
2210 different sizes and associativities of the Icache. We use a small
2211 bit of dispatch code that can be put hidden in every shared object,
2212 which calls the actual processor-specific invalidation code in a
2213 separate module.
2214 Or if you have operating system support, the OS could mmap the
2215 procesor-specific code from a single page, since it is highly
2216 repetitive. */
4e734737 2217 .global GLOBAL(ic_invalidate)
59312820 2218 HIDDEN_FUNC(GLOBAL(ic_invalidate))
4e734737 2219GLOBAL(ic_invalidate):
59312820 2220#ifdef __pic__
a9cfe83b 2221#ifdef __vxworks
2222 mov.l 1f,r1
2223 mov.l 2f,r0
2224 mov.l @r1,r1
2225 mov.l 0f,r2
2226 mov.l @(r0,r1),r0
2227#else
2228 mov.l 1f,r1
2229 mova 1f,r0
2230 mov.l 0f,r2
59312820 2231 add r1,r0
a9cfe83b 2232#endif
59312820 2233 mov.l @(r0,r2),r1
a9cfe83b 2234#else
2235 mov.l 0f,r1
59312820 2236#endif
2237 ocbwb @r4
2238 mov.l @(8,r1),r0
2239 sub r1,r4
2240 and r4,r0
2241 add r1,r0
2242 jmp @r0
2243 mov.l @(4,r1),r0
a819eb1f 2244 .align 2
59312820 2245#ifndef __pic__
22460: .long GLOBAL(ic_invalidate_array)
2247#else /* __pic__ */
2248 .global GLOBAL(ic_invalidate_array)
a9cfe83b 22490: .long GLOBAL(ic_invalidate_array)@GOT
2250#ifdef __vxworks
22511: .long ___GOTT_BASE__
22522: .long ___GOTT_INDEX__
2253#else
22541: .long _GLOBAL_OFFSET_TABLE_
2255#endif
59312820 2256 ENDFUNC(GLOBAL(ic_invalidate))
2257#endif /* __pic__ */
2258#endif /* SH4 */
2259#endif /* L_ic_invalidate */
2260
2261#ifdef L_ic_invalidate_array
3a6994f8 2262#if defined(__SH4A__) || (defined (__FORCE_SH4A__) && (defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))))
2263 .global GLOBAL(ic_invalidate_array)
59312820 2264 /* This is needed when an SH4 dso with trampolines is used on SH4A. */
2265 .global GLOBAL(ic_invalidate_array)
2266 FUNC(GLOBAL(ic_invalidate_array))
2267GLOBAL(ic_invalidate_array):
2268 add r1,r4
2269 synco
59312820 2270 icbi @r4
85714674 2271 rts
2272 nop
2273 .align 2
59312820 2274 .long 0
2275 ENDFUNC(GLOBAL(ic_invalidate_array))
2276#elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))
2277 .global GLOBAL(ic_invalidate_array)
8110cc37 2278 .p2align 5
59312820 2279 FUNC(GLOBAL(ic_invalidate_array))
8110cc37 2280/* This must be aligned to the beginning of a cache line. */
59312820 2281GLOBAL(ic_invalidate_array):
2282#ifndef WAYS
2283#define WAYS 4
2284#define WAY_SIZE 0x4000
2285#endif
2286#if WAYS == 1
2287 .rept WAY_SIZE * WAYS / 32
2288 rts
2289 nop
2290 .rept 7
2291 .long WAY_SIZE - 32
2292 .endr
2293 .endr
2294#elif WAYS <= 6
2295 .rept WAY_SIZE * WAYS / 32
2296 braf r0
2297 add #-8,r0
2298 .long WAY_SIZE + 8
2299 .long WAY_SIZE - 32
2300 .rept WAYS-2
2301 braf r0
2302 nop
2303 .endr
2304 .rept 7 - WAYS
2305 rts
2306 nop
2307 .endr
2308 .endr
2309#else /* WAYS > 6 */
2310 /* This variant needs two different pages for mmap-ing. */
2311 .rept WAYS-1
2312 .rept WAY_SIZE / 32
2313 braf r0
2314 nop
2315 .long WAY_SIZE
2316 .rept 6
2317 .long WAY_SIZE - 32
2318 .endr
2319 .endr
2320 .endr
2321 .rept WAY_SIZE / 32
4e734737 2322 rts
8110cc37 2323 .rept 15
4e734737 2324 nop
2325 .endr
8110cc37 2326 .endr
59312820 2327#endif /* WAYS */
2328 ENDFUNC(GLOBAL(ic_invalidate_array))
4e734737 2329#endif /* SH4 */
59312820 2330#endif /* L_ic_invalidate_array */
87e19636 2331
2332#if defined (__SH5__) && __SH5__ == 32
2333#ifdef L_shcompact_call_trampoline
2334 .section .rodata
2335 .align 1
2336LOCAL(ct_main_table):
2337.word LOCAL(ct_r2_fp) - datalabel LOCAL(ct_main_label)
2338.word LOCAL(ct_r2_ld) - datalabel LOCAL(ct_main_label)
2339.word LOCAL(ct_r2_pop) - datalabel LOCAL(ct_main_label)
2340.word LOCAL(ct_r3_fp) - datalabel LOCAL(ct_main_label)
2341.word LOCAL(ct_r3_ld) - datalabel LOCAL(ct_main_label)
2342.word LOCAL(ct_r3_pop) - datalabel LOCAL(ct_main_label)
2343.word LOCAL(ct_r4_fp) - datalabel LOCAL(ct_main_label)
2344.word LOCAL(ct_r4_ld) - datalabel LOCAL(ct_main_label)
2345.word LOCAL(ct_r4_pop) - datalabel LOCAL(ct_main_label)
2346.word LOCAL(ct_r5_fp) - datalabel LOCAL(ct_main_label)
2347.word LOCAL(ct_r5_ld) - datalabel LOCAL(ct_main_label)
2348.word LOCAL(ct_r5_pop) - datalabel LOCAL(ct_main_label)
2349.word LOCAL(ct_r6_fph) - datalabel LOCAL(ct_main_label)
2350.word LOCAL(ct_r6_fpl) - datalabel LOCAL(ct_main_label)
2351.word LOCAL(ct_r6_ld) - datalabel LOCAL(ct_main_label)
2352.word LOCAL(ct_r6_pop) - datalabel LOCAL(ct_main_label)
2353.word LOCAL(ct_r7_fph) - datalabel LOCAL(ct_main_label)
2354.word LOCAL(ct_r7_fpl) - datalabel LOCAL(ct_main_label)
2355.word LOCAL(ct_r7_ld) - datalabel LOCAL(ct_main_label)
2356.word LOCAL(ct_r7_pop) - datalabel LOCAL(ct_main_label)
2357.word LOCAL(ct_r8_fph) - datalabel LOCAL(ct_main_label)
2358.word LOCAL(ct_r8_fpl) - datalabel LOCAL(ct_main_label)
2359.word LOCAL(ct_r8_ld) - datalabel LOCAL(ct_main_label)
2360.word LOCAL(ct_r8_pop) - datalabel LOCAL(ct_main_label)
2361.word LOCAL(ct_r9_fph) - datalabel LOCAL(ct_main_label)
2362.word LOCAL(ct_r9_fpl) - datalabel LOCAL(ct_main_label)
2363.word LOCAL(ct_r9_ld) - datalabel LOCAL(ct_main_label)
2364.word LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
2365.word LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
2366.word LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
2367.word LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
2368.word LOCAL(ct_ret_wide) - datalabel LOCAL(ct_main_label)
2369.word LOCAL(ct_call_func) - datalabel LOCAL(ct_main_label)
2370 .mode SHmedia
2371 .section .text..SHmedia32, "ax"
2372 .align 2
2373
2374 /* This function loads 64-bit general-purpose registers from the
2375 stack, from a memory address contained in them or from an FP
2376 register, according to a cookie passed in r1. Its execution
2377 time is linear on the number of registers that actually have
2378 to be copied. See sh.h for details on the actual bit pattern.
2379
2380 The function to be called is passed in r0. If a 32-bit return
2381 value is expected, the actual function will be tail-called,
2382 otherwise the return address will be stored in r10 (that the
2383 caller should expect to be clobbered) and the return value
2384 will be expanded into r2/r3 upon return. */
2385
2386 .global GLOBAL(GCC_shcompact_call_trampoline)
805e22b2 2387 FUNC(GLOBAL(GCC_shcompact_call_trampoline))
87e19636 2388GLOBAL(GCC_shcompact_call_trampoline):
2389 ptabs/l r0, tr0 /* Prepare to call the actual function. */
2390 movi ((datalabel LOCAL(ct_main_table) - 31 * 2) >> 16) & 65535, r0
2391 pt/l LOCAL(ct_loop), tr1
2392 addz.l r1, r63, r1
2393 shori ((datalabel LOCAL(ct_main_table) - 31 * 2)) & 65535, r0
2394LOCAL(ct_loop):
2395 nsb r1, r28
2396 shlli r28, 1, r29
2397 ldx.w r0, r29, r30
2398LOCAL(ct_main_label):
2399 ptrel/l r30, tr2
2400 blink tr2, r63
2401LOCAL(ct_r2_fp): /* Copy r2 from an FP register. */
2402 /* It must be dr0, so just do it. */
2403 fmov.dq dr0, r2
2404 movi 7, r30
2405 shlli r30, 29, r31
2406 andc r1, r31, r1
2407 blink tr1, r63
2408LOCAL(ct_r3_fp): /* Copy r3 from an FP register. */
2409 /* It is either dr0 or dr2. */
2410 movi 7, r30
2411 shlri r1, 26, r32
2412 shlli r30, 26, r31
2413 andc r1, r31, r1
2414 fmov.dq dr0, r3
2415 beqi/l r32, 4, tr1
2416 fmov.dq dr2, r3
2417 blink tr1, r63
2418LOCAL(ct_r4_fp): /* Copy r4 from an FP register. */
2419 shlri r1, 23 - 3, r34
2420 andi r34, 3 << 3, r33
2421 addi r33, LOCAL(ct_r4_fp_copy) - datalabel LOCAL(ct_r4_fp_base), r32
2422LOCAL(ct_r4_fp_base):
2423 ptrel/l r32, tr2
2424 movi 7, r30
2425 shlli r30, 23, r31
2426 andc r1, r31, r1
2427 blink tr2, r63
2428LOCAL(ct_r4_fp_copy):
2429 fmov.dq dr0, r4
2430 blink tr1, r63
2431 fmov.dq dr2, r4
2432 blink tr1, r63
2433 fmov.dq dr4, r4
2434 blink tr1, r63
2435LOCAL(ct_r5_fp): /* Copy r5 from an FP register. */
2436 shlri r1, 20 - 3, r34
2437 andi r34, 3 << 3, r33
2438 addi r33, LOCAL(ct_r5_fp_copy) - datalabel LOCAL(ct_r5_fp_base), r32
2439LOCAL(ct_r5_fp_base):
2440 ptrel/l r32, tr2
2441 movi 7, r30
2442 shlli r30, 20, r31
2443 andc r1, r31, r1
2444 blink tr2, r63
2445LOCAL(ct_r5_fp_copy):
2446 fmov.dq dr0, r5
2447 blink tr1, r63
2448 fmov.dq dr2, r5
2449 blink tr1, r63
2450 fmov.dq dr4, r5
2451 blink tr1, r63
2452 fmov.dq dr6, r5
2453 blink tr1, r63
2454LOCAL(ct_r6_fph): /* Copy r6 from a high FP register. */
2455 /* It must be dr8. */
2456 fmov.dq dr8, r6
2457 movi 15, r30
2458 shlli r30, 16, r31
2459 andc r1, r31, r1
2460 blink tr1, r63
2461LOCAL(ct_r6_fpl): /* Copy r6 from a low FP register. */
2462 shlri r1, 16 - 3, r34
2463 andi r34, 3 << 3, r33
2464 addi r33, LOCAL(ct_r6_fp_copy) - datalabel LOCAL(ct_r6_fp_base), r32
2465LOCAL(ct_r6_fp_base):
2466 ptrel/l r32, tr2
2467 movi 7, r30
2468 shlli r30, 16, r31
2469 andc r1, r31, r1
2470 blink tr2, r63
2471LOCAL(ct_r6_fp_copy):
2472 fmov.dq dr0, r6
2473 blink tr1, r63
2474 fmov.dq dr2, r6
2475 blink tr1, r63
2476 fmov.dq dr4, r6
2477 blink tr1, r63
2478 fmov.dq dr6, r6
2479 blink tr1, r63
2480LOCAL(ct_r7_fph): /* Copy r7 from a high FP register. */
2481 /* It is either dr8 or dr10. */
2482 movi 15 << 12, r31
2483 shlri r1, 12, r32
2484 andc r1, r31, r1
2485 fmov.dq dr8, r7
2486 beqi/l r32, 8, tr1
2487 fmov.dq dr10, r7
2488 blink tr1, r63
2489LOCAL(ct_r7_fpl): /* Copy r7 from a low FP register. */
2490 shlri r1, 12 - 3, r34
2491 andi r34, 3 << 3, r33
2492 addi r33, LOCAL(ct_r7_fp_copy) - datalabel LOCAL(ct_r7_fp_base), r32
2493LOCAL(ct_r7_fp_base):
2494 ptrel/l r32, tr2
2495 movi 7 << 12, r31
2496 andc r1, r31, r1
2497 blink tr2, r63
2498LOCAL(ct_r7_fp_copy):
2499 fmov.dq dr0, r7
2500 blink tr1, r63
2501 fmov.dq dr2, r7
2502 blink tr1, r63
2503 fmov.dq dr4, r7
2504 blink tr1, r63
2505 fmov.dq dr6, r7
2506 blink tr1, r63
2507LOCAL(ct_r8_fph): /* Copy r8 from a high FP register. */
2508 /* It is either dr8 or dr10. */
2509 movi 15 << 8, r31
2510 andi r1, 1 << 8, r32
2511 andc r1, r31, r1
2512 fmov.dq dr8, r8
2513 beq/l r32, r63, tr1
2514 fmov.dq dr10, r8
2515 blink tr1, r63
2516LOCAL(ct_r8_fpl): /* Copy r8 from a low FP register. */
2517 shlri r1, 8 - 3, r34
2518 andi r34, 3 << 3, r33
2519 addi r33, LOCAL(ct_r8_fp_copy) - datalabel LOCAL(ct_r8_fp_base), r32
2520LOCAL(ct_r8_fp_base):
2521 ptrel/l r32, tr2
2522 movi 7 << 8, r31
2523 andc r1, r31, r1
2524 blink tr2, r63
2525LOCAL(ct_r8_fp_copy):
2526 fmov.dq dr0, r8
2527 blink tr1, r63
2528 fmov.dq dr2, r8
2529 blink tr1, r63
2530 fmov.dq dr4, r8
2531 blink tr1, r63
2532 fmov.dq dr6, r8
2533 blink tr1, r63
2534LOCAL(ct_r9_fph): /* Copy r9 from a high FP register. */
2535 /* It is either dr8 or dr10. */
2536 movi 15 << 4, r31
2537 andi r1, 1 << 4, r32
2538 andc r1, r31, r1
2539 fmov.dq dr8, r9
2540 beq/l r32, r63, tr1
2541 fmov.dq dr10, r9
2542 blink tr1, r63
2543LOCAL(ct_r9_fpl): /* Copy r9 from a low FP register. */
2544 shlri r1, 4 - 3, r34
2545 andi r34, 3 << 3, r33
2546 addi r33, LOCAL(ct_r9_fp_copy) - datalabel LOCAL(ct_r9_fp_base), r32
2547LOCAL(ct_r9_fp_base):
2548 ptrel/l r32, tr2
2549 movi 7 << 4, r31
2550 andc r1, r31, r1
2551 blink tr2, r63
2552LOCAL(ct_r9_fp_copy):
2553 fmov.dq dr0, r9
2554 blink tr1, r63
2555 fmov.dq dr2, r9
2556 blink tr1, r63
2557 fmov.dq dr4, r9
2558 blink tr1, r63
2559 fmov.dq dr6, r9
2560 blink tr1, r63
2561LOCAL(ct_r2_ld): /* Copy r2 from a memory address. */
2562 pt/l LOCAL(ct_r2_load), tr2
2563 movi 3, r30
2564 shlli r30, 29, r31
2565 and r1, r31, r32
2566 andc r1, r31, r1
2567 beq/l r31, r32, tr2
2568 addi.l r2, 8, r3
2569 ldx.q r2, r63, r2
2570 /* Fall through. */
2571LOCAL(ct_r3_ld): /* Copy r3 from a memory address. */
2572 pt/l LOCAL(ct_r3_load), tr2
2573 movi 3, r30
2574 shlli r30, 26, r31
2575 and r1, r31, r32
2576 andc r1, r31, r1
2577 beq/l r31, r32, tr2
2578 addi.l r3, 8, r4
2579 ldx.q r3, r63, r3
2580LOCAL(ct_r4_ld): /* Copy r4 from a memory address. */
2581 pt/l LOCAL(ct_r4_load), tr2
2582 movi 3, r30
2583 shlli r30, 23, r31
2584 and r1, r31, r32
2585 andc r1, r31, r1
2586 beq/l r31, r32, tr2
2587 addi.l r4, 8, r5
2588 ldx.q r4, r63, r4
2589LOCAL(ct_r5_ld): /* Copy r5 from a memory address. */
2590 pt/l LOCAL(ct_r5_load), tr2
2591 movi 3, r30
2592 shlli r30, 20, r31
2593 and r1, r31, r32
2594 andc r1, r31, r1
2595 beq/l r31, r32, tr2
2596 addi.l r5, 8, r6
2597 ldx.q r5, r63, r5
2598LOCAL(ct_r6_ld): /* Copy r6 from a memory address. */
2599 pt/l LOCAL(ct_r6_load), tr2
2600 movi 3 << 16, r31
2601 and r1, r31, r32
2602 andc r1, r31, r1
2603 beq/l r31, r32, tr2
2604 addi.l r6, 8, r7
2605 ldx.q r6, r63, r6
2606LOCAL(ct_r7_ld): /* Copy r7 from a memory address. */
2607 pt/l LOCAL(ct_r7_load), tr2
2608 movi 3 << 12, r31
2609 and r1, r31, r32
2610 andc r1, r31, r1
2611 beq/l r31, r32, tr2
2612 addi.l r7, 8, r8
2613 ldx.q r7, r63, r7
2614LOCAL(ct_r8_ld): /* Copy r8 from a memory address. */
2615 pt/l LOCAL(ct_r8_load), tr2
2616 movi 3 << 8, r31
2617 and r1, r31, r32
2618 andc r1, r31, r1
2619 beq/l r31, r32, tr2
2620 addi.l r8, 8, r9
2621 ldx.q r8, r63, r8
2622LOCAL(ct_r9_ld): /* Copy r9 from a memory address. */
2623 pt/l LOCAL(ct_check_tramp), tr2
2624 ldx.q r9, r63, r9
2625 blink tr2, r63
2626LOCAL(ct_r2_load):
2627 ldx.q r2, r63, r2
2628 blink tr1, r63
2629LOCAL(ct_r3_load):
2630 ldx.q r3, r63, r3
2631 blink tr1, r63
2632LOCAL(ct_r4_load):
2633 ldx.q r4, r63, r4
2634 blink tr1, r63
2635LOCAL(ct_r5_load):
2636 ldx.q r5, r63, r5
2637 blink tr1, r63
2638LOCAL(ct_r6_load):
2639 ldx.q r6, r63, r6
2640 blink tr1, r63
2641LOCAL(ct_r7_load):
2642 ldx.q r7, r63, r7
2643 blink tr1, r63
2644LOCAL(ct_r8_load):
2645 ldx.q r8, r63, r8
2646 blink tr1, r63
2647LOCAL(ct_r2_pop): /* Pop r2 from the stack. */
2648 movi 1, r30
2649 ldx.q r15, r63, r2
2650 shlli r30, 29, r31
2651 addi.l r15, 8, r15
2652 andc r1, r31, r1
2653 blink tr1, r63
2654LOCAL(ct_r3_pop): /* Pop r3 from the stack. */
2655 movi 1, r30
2656 ldx.q r15, r63, r3
2657 shlli r30, 26, r31
2658 addi.l r15, 8, r15
2659 andc r1, r31, r1
2660 blink tr1, r63
2661LOCAL(ct_r4_pop): /* Pop r4 from the stack. */
2662 movi 1, r30
2663 ldx.q r15, r63, r4
2664 shlli r30, 23, r31
2665 addi.l r15, 8, r15
2666 andc r1, r31, r1
2667 blink tr1, r63
2668LOCAL(ct_r5_pop): /* Pop r5 from the stack. */
2669 movi 1, r30
2670 ldx.q r15, r63, r5
2671 shlli r30, 20, r31
2672 addi.l r15, 8, r15
2673 andc r1, r31, r1
2674 blink tr1, r63
2675LOCAL(ct_r6_pop): /* Pop r6 from the stack. */
2676 movi 1, r30
2677 ldx.q r15, r63, r6
2678 shlli r30, 16, r31
2679 addi.l r15, 8, r15
2680 andc r1, r31, r1
2681 blink tr1, r63
2682LOCAL(ct_r7_pop): /* Pop r7 from the stack. */
2683 ldx.q r15, r63, r7
2684 movi 1 << 12, r31
2685 addi.l r15, 8, r15
2686 andc r1, r31, r1
2687 blink tr1, r63
2688LOCAL(ct_r8_pop): /* Pop r8 from the stack. */
2689 ldx.q r15, r63, r8
2690 movi 1 << 8, r31
2691 addi.l r15, 8, r15
2692 andc r1, r31, r1
2693 blink tr1, r63
2694LOCAL(ct_pop_seq): /* Pop a sequence of registers off the stack. */
2695 andi r1, 7 << 1, r30
2696 movi (LOCAL(ct_end_of_pop_seq) >> 16) & 65535, r32
2697 shlli r30, 2, r31
2698 shori LOCAL(ct_end_of_pop_seq) & 65535, r32
2699 sub.l r32, r31, r33
2700 ptabs/l r33, tr2
2701 blink tr2, r63
2702LOCAL(ct_start_of_pop_seq): /* Beginning of pop sequence. */
2703 ldx.q r15, r63, r3
2704 addi.l r15, 8, r15
2705 ldx.q r15, r63, r4
2706 addi.l r15, 8, r15
2707 ldx.q r15, r63, r5
2708 addi.l r15, 8, r15
2709 ldx.q r15, r63, r6
2710 addi.l r15, 8, r15
2711 ldx.q r15, r63, r7
2712 addi.l r15, 8, r15
2713 ldx.q r15, r63, r8
2714 addi.l r15, 8, r15
2715LOCAL(ct_r9_pop): /* Pop r9 from the stack. */
2716 ldx.q r15, r63, r9
2717 addi.l r15, 8, r15
2718LOCAL(ct_end_of_pop_seq): /* Label used to compute first pop instruction. */
2719LOCAL(ct_check_tramp): /* Check whether we need a trampoline. */
2720 pt/u LOCAL(ct_ret_wide), tr2
2721 andi r1, 1, r1
2722 bne/u r1, r63, tr2
2723LOCAL(ct_call_func): /* Just branch to the function. */
2724 blink tr0, r63
2725LOCAL(ct_ret_wide): /* Call the function, so that we can unpack its
2726 64-bit return value. */
2727 add.l r18, r63, r10
2728 blink tr0, r18
2729 ptabs r10, tr0
2730#if __LITTLE_ENDIAN__
2731 shari r2, 32, r3
2732 add.l r2, r63, r2
2733#else
2734 add.l r2, r63, r3
2735 shari r2, 32, r2
2736#endif
2737 blink tr0, r63
805e22b2 2738
2739 ENDFUNC(GLOBAL(GCC_shcompact_call_trampoline))
87e19636 2740#endif /* L_shcompact_call_trampoline */
2741
2742#ifdef L_shcompact_return_trampoline
2743 /* This function does the converse of the code in `ret_wide'
2744 above. It is tail-called by SHcompact functions returning
2745 64-bit non-floating-point values, to pack the 32-bit values in
2746 r2 and r3 into r2. */
2747
2748 .mode SHmedia
2749 .section .text..SHmedia32, "ax"
2750 .align 2
2751 .global GLOBAL(GCC_shcompact_return_trampoline)
59312820 2752 HIDDEN_FUNC(GLOBAL(GCC_shcompact_return_trampoline))
87e19636 2753GLOBAL(GCC_shcompact_return_trampoline):
2754 ptabs/l r18, tr0
2755#if __LITTLE_ENDIAN__
2756 addz.l r2, r63, r2
2757 shlli r3, 32, r3
2758#else
2759 addz.l r3, r63, r3
2760 shlli r2, 32, r2
2761#endif
2762 or r3, r2, r2
2763 blink tr0, r63
805e22b2 2764
2765 ENDFUNC(GLOBAL(GCC_shcompact_return_trampoline))
87e19636 2766#endif /* L_shcompact_return_trampoline */
2767
2768#ifdef L_shcompact_incoming_args
2769 .section .rodata
2770 .align 1
2771LOCAL(ia_main_table):
2772.word 1 /* Invalid, just loop */
2773.word LOCAL(ia_r2_ld) - datalabel LOCAL(ia_main_label)
2774.word LOCAL(ia_r2_push) - datalabel LOCAL(ia_main_label)
2775.word 1 /* Invalid, just loop */
2776.word LOCAL(ia_r3_ld) - datalabel LOCAL(ia_main_label)
2777.word LOCAL(ia_r3_push) - datalabel LOCAL(ia_main_label)
2778.word 1 /* Invalid, just loop */
2779.word LOCAL(ia_r4_ld) - datalabel LOCAL(ia_main_label)
2780.word LOCAL(ia_r4_push) - datalabel LOCAL(ia_main_label)
2781.word 1 /* Invalid, just loop */
2782.word LOCAL(ia_r5_ld) - datalabel LOCAL(ia_main_label)
2783.word LOCAL(ia_r5_push) - datalabel LOCAL(ia_main_label)
2784.word 1 /* Invalid, just loop */
2785.word 1 /* Invalid, just loop */
2786.word LOCAL(ia_r6_ld) - datalabel LOCAL(ia_main_label)
2787.word LOCAL(ia_r6_push) - datalabel LOCAL(ia_main_label)
2788.word 1 /* Invalid, just loop */
2789.word 1 /* Invalid, just loop */
2790.word LOCAL(ia_r7_ld) - datalabel LOCAL(ia_main_label)
2791.word LOCAL(ia_r7_push) - datalabel LOCAL(ia_main_label)
2792.word 1 /* Invalid, just loop */
2793.word 1 /* Invalid, just loop */
2794.word LOCAL(ia_r8_ld) - datalabel LOCAL(ia_main_label)
2795.word LOCAL(ia_r8_push) - datalabel LOCAL(ia_main_label)
2796.word 1 /* Invalid, just loop */
2797.word 1 /* Invalid, just loop */
2798.word LOCAL(ia_r9_ld) - datalabel LOCAL(ia_main_label)
2799.word LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
2800.word LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
2801.word LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
2802.word LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
2803.word LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
2804.word LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
2805 .mode SHmedia
2806 .section .text..SHmedia32, "ax"
2807 .align 2
2808
2809 /* This function stores 64-bit general-purpose registers back in
101d4704 2810 the stack, and loads the address in which each register
2811 was stored into itself. The lower 32 bits of r17 hold the address
2812 to begin storing, and the upper 32 bits of r17 hold the cookie.
2813 Its execution time is linear on the
87e19636 2814 number of registers that actually have to be copied, and it is
2815 optimized for structures larger than 64 bits, as opposed to
0924aa1d 2816 individual `long long' arguments. See sh.h for details on the
87e19636 2817 actual bit pattern. */
2818
2819 .global GLOBAL(GCC_shcompact_incoming_args)
59312820 2820 FUNC(GLOBAL(GCC_shcompact_incoming_args))
87e19636 2821GLOBAL(GCC_shcompact_incoming_args):
2822 ptabs/l r18, tr0 /* Prepare to return. */
2823 shlri r17, 32, r0 /* Load the cookie. */
101d4704 2824 movi ((datalabel LOCAL(ia_main_table) - 31 * 2) >> 16) & 65535, r43
87e19636 2825 pt/l LOCAL(ia_loop), tr1
2826 add.l r17, r63, r17
101d4704 2827 shori ((datalabel LOCAL(ia_main_table) - 31 * 2)) & 65535, r43
87e19636 2828LOCAL(ia_loop):
101d4704 2829 nsb r0, r36
2830 shlli r36, 1, r37
2831 ldx.w r43, r37, r38
87e19636 2832LOCAL(ia_main_label):
101d4704 2833 ptrel/l r38, tr2
87e19636 2834 blink tr2, r63
2835LOCAL(ia_r2_ld): /* Store r2 and load its address. */
101d4704 2836 movi 3, r38
2837 shlli r38, 29, r39
2838 and r0, r39, r40
2839 andc r0, r39, r0
87e19636 2840 stx.q r17, r63, r2
2841 add.l r17, r63, r2
2842 addi.l r17, 8, r17
101d4704 2843 beq/u r39, r40, tr1
87e19636 2844LOCAL(ia_r3_ld): /* Store r3 and load its address. */
101d4704 2845 movi 3, r38
2846 shlli r38, 26, r39
2847 and r0, r39, r40
2848 andc r0, r39, r0
87e19636 2849 stx.q r17, r63, r3
2850 add.l r17, r63, r3
2851 addi.l r17, 8, r17
101d4704 2852 beq/u r39, r40, tr1
87e19636 2853LOCAL(ia_r4_ld): /* Store r4 and load its address. */
101d4704 2854 movi 3, r38
2855 shlli r38, 23, r39
2856 and r0, r39, r40
2857 andc r0, r39, r0
87e19636 2858 stx.q r17, r63, r4
2859 add.l r17, r63, r4
2860 addi.l r17, 8, r17
101d4704 2861 beq/u r39, r40, tr1
87e19636 2862LOCAL(ia_r5_ld): /* Store r5 and load its address. */
101d4704 2863 movi 3, r38
2864 shlli r38, 20, r39
2865 and r0, r39, r40
2866 andc r0, r39, r0
87e19636 2867 stx.q r17, r63, r5
2868 add.l r17, r63, r5
2869 addi.l r17, 8, r17
101d4704 2870 beq/u r39, r40, tr1
87e19636 2871LOCAL(ia_r6_ld): /* Store r6 and load its address. */
101d4704 2872 movi 3, r38
2873 shlli r38, 16, r39
2874 and r0, r39, r40
2875 andc r0, r39, r0
87e19636 2876 stx.q r17, r63, r6
2877 add.l r17, r63, r6
2878 addi.l r17, 8, r17
101d4704 2879 beq/u r39, r40, tr1
87e19636 2880LOCAL(ia_r7_ld): /* Store r7 and load its address. */
101d4704 2881 movi 3 << 12, r39
2882 and r0, r39, r40
2883 andc r0, r39, r0
87e19636 2884 stx.q r17, r63, r7
2885 add.l r17, r63, r7
2886 addi.l r17, 8, r17
101d4704 2887 beq/u r39, r40, tr1
87e19636 2888LOCAL(ia_r8_ld): /* Store r8 and load its address. */
101d4704 2889 movi 3 << 8, r39
2890 and r0, r39, r40
2891 andc r0, r39, r0
87e19636 2892 stx.q r17, r63, r8
2893 add.l r17, r63, r8
2894 addi.l r17, 8, r17
101d4704 2895 beq/u r39, r40, tr1
87e19636 2896LOCAL(ia_r9_ld): /* Store r9 and load its address. */
2897 stx.q r17, r63, r9
2898 add.l r17, r63, r9
2899 blink tr0, r63
2900LOCAL(ia_r2_push): /* Push r2 onto the stack. */
101d4704 2901 movi 1, r38
2902 shlli r38, 29, r39
2903 andc r0, r39, r0
87e19636 2904 stx.q r17, r63, r2
2905 addi.l r17, 8, r17
2906 blink tr1, r63
2907LOCAL(ia_r3_push): /* Push r3 onto the stack. */
101d4704 2908 movi 1, r38
2909 shlli r38, 26, r39
2910 andc r0, r39, r0
87e19636 2911 stx.q r17, r63, r3
2912 addi.l r17, 8, r17
2913 blink tr1, r63
2914LOCAL(ia_r4_push): /* Push r4 onto the stack. */
101d4704 2915 movi 1, r38
2916 shlli r38, 23, r39
2917 andc r0, r39, r0
87e19636 2918 stx.q r17, r63, r4
2919 addi.l r17, 8, r17
2920 blink tr1, r63
2921LOCAL(ia_r5_push): /* Push r5 onto the stack. */
101d4704 2922 movi 1, r38
2923 shlli r38, 20, r39
2924 andc r0, r39, r0
87e19636 2925 stx.q r17, r63, r5
2926 addi.l r17, 8, r17
2927 blink tr1, r63
2928LOCAL(ia_r6_push): /* Push r6 onto the stack. */
101d4704 2929 movi 1, r38
2930 shlli r38, 16, r39
2931 andc r0, r39, r0
87e19636 2932 stx.q r17, r63, r6
2933 addi.l r17, 8, r17
2934 blink tr1, r63
2935LOCAL(ia_r7_push): /* Push r7 onto the stack. */
101d4704 2936 movi 1 << 12, r39
2937 andc r0, r39, r0
87e19636 2938 stx.q r17, r63, r7
2939 addi.l r17, 8, r17
2940 blink tr1, r63
2941LOCAL(ia_r8_push): /* Push r8 onto the stack. */
101d4704 2942 movi 1 << 8, r39
2943 andc r0, r39, r0
87e19636 2944 stx.q r17, r63, r8
2945 addi.l r17, 8, r17
2946 blink tr1, r63
2947LOCAL(ia_push_seq): /* Push a sequence of registers onto the stack. */
101d4704 2948 andi r0, 7 << 1, r38
2949 movi (LOCAL(ia_end_of_push_seq) >> 16) & 65535, r40
2950 shlli r38, 2, r39
2951 shori LOCAL(ia_end_of_push_seq) & 65535, r40
2952 sub.l r40, r39, r41
2953 ptabs/l r41, tr2
87e19636 2954 blink tr2, r63
2955LOCAL(ia_stack_of_push_seq): /* Beginning of push sequence. */
2956 stx.q r17, r63, r3
2957 addi.l r17, 8, r17
2958 stx.q r17, r63, r4
2959 addi.l r17, 8, r17
2960 stx.q r17, r63, r5
2961 addi.l r17, 8, r17
2962 stx.q r17, r63, r6
2963 addi.l r17, 8, r17
2964 stx.q r17, r63, r7
2965 addi.l r17, 8, r17
2966 stx.q r17, r63, r8
2967 addi.l r17, 8, r17
2968LOCAL(ia_r9_push): /* Push r9 onto the stack. */
2969 stx.q r17, r63, r9
2970LOCAL(ia_return): /* Return. */
2971 blink tr0, r63
2972LOCAL(ia_end_of_push_seq): /* Label used to compute the first push instruction. */
805e22b2 2973 ENDFUNC(GLOBAL(GCC_shcompact_incoming_args))
87e19636 2974#endif /* L_shcompact_incoming_args */
2975#endif
2976#if __SH5__
2977#ifdef L_nested_trampoline
2978#if __SH5__ == 32
2979 .section .text..SHmedia32,"ax"
2980#else
2981 .text
2982#endif
2983 .align 3 /* It is copied in units of 8 bytes in SHmedia mode. */
2984 .global GLOBAL(GCC_nested_trampoline)
59312820 2985 HIDDEN_FUNC(GLOBAL(GCC_nested_trampoline))
87e19636 2986GLOBAL(GCC_nested_trampoline):
2987 .mode SHmedia
2988 ptrel/u r63, tr0
2989 gettr tr0, r0
2990#if __SH5__ == 64
2991 ld.q r0, 24, r1
2992#else
2993 ld.l r0, 24, r1
2994#endif
2995 ptabs/l r1, tr1
2996#if __SH5__ == 64
2997 ld.q r0, 32, r1
2998#else
2999 ld.l r0, 28, r1
3000#endif
3001 blink tr1, r63
805e22b2 3002
3003 ENDFUNC(GLOBAL(GCC_nested_trampoline))
87e19636 3004#endif /* L_nested_trampoline */
3005#endif /* __SH5__ */
3006#if __SH5__ == 32
3007#ifdef L_push_pop_shmedia_regs
3008 .section .text..SHmedia32,"ax"
3009 .mode SHmedia
3010 .align 2
3011#ifndef __SH4_NOFPU__
3012 .global GLOBAL(GCC_push_shmedia_regs)
805e22b2 3013 FUNC(GLOBAL(GCC_push_shmedia_regs))
87e19636 3014GLOBAL(GCC_push_shmedia_regs):
3015 addi.l r15, -14*8, r15
3016 fst.d r15, 13*8, dr62
3017 fst.d r15, 12*8, dr60
3018 fst.d r15, 11*8, dr58
3019 fst.d r15, 10*8, dr56
3020 fst.d r15, 9*8, dr54
3021 fst.d r15, 8*8, dr52
3022 fst.d r15, 7*8, dr50
3023 fst.d r15, 6*8, dr48
3024 fst.d r15, 5*8, dr46
3025 fst.d r15, 4*8, dr44
3026 fst.d r15, 3*8, dr42
3027 fst.d r15, 2*8, dr40
3028 fst.d r15, 1*8, dr38
3029 fst.d r15, 0*8, dr36
59312820 3030#else /* ! __SH4_NOFPU__ */
87e19636 3031 .global GLOBAL(GCC_push_shmedia_regs_nofpu)
805e22b2 3032 FUNC(GLOBAL(GCC_push_shmedia_regs_nofpu))
87e19636 3033GLOBAL(GCC_push_shmedia_regs_nofpu):
59312820 3034#endif /* ! __SH4_NOFPU__ */
87e19636 3035 ptabs/l r18, tr0
3036 addi.l r15, -27*8, r15
3037 gettr tr7, r62
3038 gettr tr6, r61
3039 gettr tr5, r60
3040 st.q r15, 26*8, r62
3041 st.q r15, 25*8, r61
3042 st.q r15, 24*8, r60
3043 st.q r15, 23*8, r59
3044 st.q r15, 22*8, r58
3045 st.q r15, 21*8, r57
3046 st.q r15, 20*8, r56
3047 st.q r15, 19*8, r55
3048 st.q r15, 18*8, r54
3049 st.q r15, 17*8, r53
3050 st.q r15, 16*8, r52
3051 st.q r15, 15*8, r51
3052 st.q r15, 14*8, r50
3053 st.q r15, 13*8, r49
3054 st.q r15, 12*8, r48
3055 st.q r15, 11*8, r47
3056 st.q r15, 10*8, r46
3057 st.q r15, 9*8, r45
3058 st.q r15, 8*8, r44
3059 st.q r15, 7*8, r35
3060 st.q r15, 6*8, r34
3061 st.q r15, 5*8, r33
3062 st.q r15, 4*8, r32
3063 st.q r15, 3*8, r31
3064 st.q r15, 2*8, r30
3065 st.q r15, 1*8, r29
3066 st.q r15, 0*8, r28
3067 blink tr0, r63
805e22b2 3068#ifndef __SH4_NOFPU__
3069 ENDFUNC(GLOBAL(GCC_push_shmedia_regs))
59312820 3070#else
805e22b2 3071 ENDFUNC(GLOBAL(GCC_push_shmedia_regs_nofpu))
59312820 3072#endif
3073#ifndef __SH4_NOFPU__
87e19636 3074 .global GLOBAL(GCC_pop_shmedia_regs)
805e22b2 3075 FUNC(GLOBAL(GCC_pop_shmedia_regs))
87e19636 3076GLOBAL(GCC_pop_shmedia_regs):
3077 pt .L0, tr1
3078 movi 41*8, r0
3079 fld.d r15, 40*8, dr62
3080 fld.d r15, 39*8, dr60
3081 fld.d r15, 38*8, dr58
3082 fld.d r15, 37*8, dr56
3083 fld.d r15, 36*8, dr54
3084 fld.d r15, 35*8, dr52
3085 fld.d r15, 34*8, dr50
3086 fld.d r15, 33*8, dr48
3087 fld.d r15, 32*8, dr46
3088 fld.d r15, 31*8, dr44
3089 fld.d r15, 30*8, dr42
3090 fld.d r15, 29*8, dr40
3091 fld.d r15, 28*8, dr38
3092 fld.d r15, 27*8, dr36
3093 blink tr1, r63
59312820 3094#else /* ! __SH4_NOFPU__ */
87e19636 3095 .global GLOBAL(GCC_pop_shmedia_regs_nofpu)
805e22b2 3096 FUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu))
87e19636 3097GLOBAL(GCC_pop_shmedia_regs_nofpu):
59312820 3098#endif /* ! __SH4_NOFPU__ */
87e19636 3099 movi 27*8, r0
3100.L0:
3101 ptabs r18, tr0
3102 ld.q r15, 26*8, r62
3103 ld.q r15, 25*8, r61
3104 ld.q r15, 24*8, r60
3105 ptabs r62, tr7
3106 ptabs r61, tr6
3107 ptabs r60, tr5
3108 ld.q r15, 23*8, r59
3109 ld.q r15, 22*8, r58
3110 ld.q r15, 21*8, r57
3111 ld.q r15, 20*8, r56
3112 ld.q r15, 19*8, r55
3113 ld.q r15, 18*8, r54
3114 ld.q r15, 17*8, r53
3115 ld.q r15, 16*8, r52
3116 ld.q r15, 15*8, r51
3117 ld.q r15, 14*8, r50
3118 ld.q r15, 13*8, r49
3119 ld.q r15, 12*8, r48
3120 ld.q r15, 11*8, r47
3121 ld.q r15, 10*8, r46
3122 ld.q r15, 9*8, r45
3123 ld.q r15, 8*8, r44
3124 ld.q r15, 7*8, r35
3125 ld.q r15, 6*8, r34
3126 ld.q r15, 5*8, r33
3127 ld.q r15, 4*8, r32
3128 ld.q r15, 3*8, r31
3129 ld.q r15, 2*8, r30
3130 ld.q r15, 1*8, r29
3131 ld.q r15, 0*8, r28
3132 add.l r15, r0, r15
3133 blink tr0, r63
805e22b2 3134
3135#ifndef __SH4_NOFPU__
3136 ENDFUNC(GLOBAL(GCC_pop_shmedia_regs))
59312820 3137#else
805e22b2 3138 ENDFUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu))
59312820 3139#endif
87e19636 3140#endif /* __SH5__ == 32 */
3141#endif /* L_push_pop_shmedia_regs */
59312820 3142
59312820 3143#ifdef L_div_table
d6005df3 3144#if __SH5__
273fffd6 3145#if defined(__pic__) && __SHMEDIA__
59312820 3146 .global GLOBAL(sdivsi3)
3147 FUNC(GLOBAL(sdivsi3))
3148#if __SH5__ == 32
3149 .section .text..SHmedia32,"ax"
3150#else
3151 .text
3152#endif
3153#if 0
3154/* ??? FIXME: Presumably due to a linker bug, exporting data symbols
3155 in a text section does not work (at least for shared libraries):
3156 the linker sets the LSB of the address as if this was SHmedia code. */
3157#define TEXT_DATA_BUG
3158#endif
3159 .align 2
3160 // inputs: r4,r5
3161 // clobbered: r1,r18,r19,r20,r21,r25,tr0
3162 // result in r0
3163 .global GLOBAL(sdivsi3)
3164GLOBAL(sdivsi3):
3165#ifdef TEXT_DATA_BUG
3166 ptb datalabel Local_div_table,tr0
3167#else
3168 ptb GLOBAL(div_table_internal),tr0
3169#endif
3170 nsb r5, r1
3171 shlld r5, r1, r25 // normalize; [-2 ..1, 1..2) in s2.62
3172 shari r25, 58, r21 // extract 5(6) bit index (s2.4 with hole -1..1)
3173 /* bubble */
3174 gettr tr0,r20
3175 ldx.ub r20, r21, r19 // u0.8
3176 shari r25, 32, r25 // normalize to s2.30
3177 shlli r21, 1, r21
3178 muls.l r25, r19, r19 // s2.38
3179 ldx.w r20, r21, r21 // s2.14
3180 ptabs r18, tr0
3181 shari r19, 24, r19 // truncate to s2.14
3182 sub r21, r19, r19 // some 11 bit inverse in s1.14
3183 muls.l r19, r19, r21 // u0.28
3184 sub r63, r1, r1
3185 addi r1, 92, r1
3186 muls.l r25, r21, r18 // s2.58
3187 shlli r19, 45, r19 // multiply by two and convert to s2.58
3188 /* bubble */
3189 sub r19, r18, r18
3190 shari r18, 28, r18 // some 22 bit inverse in s1.30
3191 muls.l r18, r25, r0 // s2.60
3192 muls.l r18, r4, r25 // s32.30
3193 /* bubble */
3194 shari r0, 16, r19 // s-16.44
3195 muls.l r19, r18, r19 // s-16.74
3196 shari r25, 63, r0
3197 shari r4, 14, r18 // s19.-14
3198 shari r19, 30, r19 // s-16.44
3199 muls.l r19, r18, r19 // s15.30
3200 xor r21, r0, r21 // You could also use the constant 1 << 27.
3201 add r21, r25, r21
3202 sub r21, r19, r21
3203 shard r21, r1, r21
3204 sub r21, r0, r0
3205 blink tr0, r63
3206 ENDFUNC(GLOBAL(sdivsi3))
3207/* This table has been generated by divtab.c .
3208Defects for bias -330:
3209 Max defect: 6.081536e-07 at -1.000000e+00
3210 Min defect: 2.849516e-08 at 1.030651e+00
3211 Max 2nd step defect: 9.606539e-12 at -1.000000e+00
3212 Min 2nd step defect: 0.000000e+00 at 0.000000e+00
3213 Defect at 1: 1.238659e-07
3214 Defect at -2: 1.061708e-07 */
3215#else /* ! __pic__ || ! __SHMEDIA__ */
3216 .section .rodata
3217#endif /* __pic__ */
273fffd6 3218#if defined(TEXT_DATA_BUG) && defined(__pic__) && __SHMEDIA__
59312820 3219 .balign 2
3220 .type Local_div_table,@object
3221 .size Local_div_table,128
3222/* negative division constants */
3223 .word -16638
3224 .word -17135
3225 .word -17737
3226 .word -18433
3227 .word -19103
3228 .word -19751
3229 .word -20583
3230 .word -21383
3231 .word -22343
3232 .word -23353
3233 .word -24407
3234 .word -25582
3235 .word -26863
3236 .word -28382
3237 .word -29965
3238 .word -31800
3239/* negative division factors */
3240 .byte 66
3241 .byte 70
3242 .byte 75
3243 .byte 81
3244 .byte 87
3245 .byte 93
3246 .byte 101
3247 .byte 109
3248 .byte 119
3249 .byte 130
3250 .byte 142
3251 .byte 156
3252 .byte 172
3253 .byte 192
3254 .byte 214
3255 .byte 241
3256 .skip 16
3257Local_div_table:
3258 .skip 16
3259/* positive division factors */
3260 .byte 241
3261 .byte 214
3262 .byte 192
3263 .byte 172
3264 .byte 156
3265 .byte 142
3266 .byte 130
3267 .byte 119
3268 .byte 109
3269 .byte 101
3270 .byte 93
3271 .byte 87
3272 .byte 81
3273 .byte 75
3274 .byte 70
3275 .byte 66
3276/* positive division constants */
3277 .word 31801
3278 .word 29966
3279 .word 28383
3280 .word 26864
3281 .word 25583
3282 .word 24408
3283 .word 23354
3284 .word 22344
3285 .word 21384
3286 .word 20584
3287 .word 19752
3288 .word 19104
3289 .word 18434
3290 .word 17738
3291 .word 17136
3292 .word 16639
3293 .section .rodata
3294#endif /* TEXT_DATA_BUG */
3295 .balign 2
3296 .type GLOBAL(div_table),@object
3297 .size GLOBAL(div_table),128
3298/* negative division constants */
3299 .word -16638
3300 .word -17135
3301 .word -17737
3302 .word -18433
3303 .word -19103
3304 .word -19751
3305 .word -20583
3306 .word -21383
3307 .word -22343
3308 .word -23353
3309 .word -24407
3310 .word -25582
3311 .word -26863
3312 .word -28382
3313 .word -29965
3314 .word -31800
3315/* negative division factors */
3316 .byte 66
3317 .byte 70
3318 .byte 75
3319 .byte 81
3320 .byte 87
3321 .byte 93
3322 .byte 101
3323 .byte 109
3324 .byte 119
3325 .byte 130
3326 .byte 142
3327 .byte 156
3328 .byte 172
3329 .byte 192
3330 .byte 214
3331 .byte 241
3332 .skip 16
3333 .global GLOBAL(div_table)
3334GLOBAL(div_table):
3335 HIDDEN_ALIAS(div_table_internal,div_table)
3336 .skip 16
3337/* positive division factors */
3338 .byte 241
3339 .byte 214
3340 .byte 192
3341 .byte 172
3342 .byte 156
3343 .byte 142
3344 .byte 130
3345 .byte 119
3346 .byte 109
3347 .byte 101
3348 .byte 93
3349 .byte 87
3350 .byte 81
3351 .byte 75
3352 .byte 70
3353 .byte 66
3354/* positive division constants */
3355 .word 31801
3356 .word 29966
3357 .word 28383
3358 .word 26864
3359 .word 25583
3360 .word 24408
3361 .word 23354
3362 .word 22344
3363 .word 21384
3364 .word 20584
3365 .word 19752
3366 .word 19104
3367 .word 18434
3368 .word 17738
3369 .word 17136
3370 .word 16639
d6005df3 3371
9fe603c3 3372#elif defined (__SH2A__) || defined (__SH3__) || defined (__SH3E__) || defined (__SH4__) || defined (__SH4_SINGLE__) || defined (__SH4_SINGLE_ONLY__) || defined (__SH4_NOFPU__)
3373/* This code uses shld, thus is not suitable for SH1 / SH2. */
d6005df3 3374
3375/* Signed / unsigned division without use of FPU, optimized for SH4.
3376 Uses a lookup table for divisors in the range -128 .. +128, and
3377 div1 with case distinction for larger divisors in three more ranges.
3378 The code is lumped together with the table to allow the use of mova. */
3379#ifdef __LITTLE_ENDIAN__
3380#define L_LSB 0
3381#define L_LSWMSB 1
3382#define L_MSWLSB 2
3383#else
3384#define L_LSB 3
3385#define L_LSWMSB 2
3386#define L_MSWLSB 1
3387#endif
3388
3389 .balign 4
3390 .global GLOBAL(udivsi3_i4i)
3391 FUNC(GLOBAL(udivsi3_i4i))
3392GLOBAL(udivsi3_i4i):
3393 mov.w LOCAL(c128_w), r1
3394 div0u
3395 mov r4,r0
3396 shlr8 r0
3397 cmp/hi r1,r5
3398 extu.w r5,r1
3399 bf LOCAL(udiv_le128)
3400 cmp/eq r5,r1
3401 bf LOCAL(udiv_ge64k)
3402 shlr r0
3403 mov r5,r1
3404 shll16 r5
3405 mov.l r4,@-r15
3406 div1 r5,r0
3407 mov.l r1,@-r15
3408 div1 r5,r0
3409 div1 r5,r0
3410 bra LOCAL(udiv_25)
3411 div1 r5,r0
3412
3413LOCAL(div_le128):
3414 mova LOCAL(div_table_ix),r0
3415 bra LOCAL(div_le128_2)
3416 mov.b @(r0,r5),r1
3417LOCAL(udiv_le128):
3418 mov.l r4,@-r15
3419 mova LOCAL(div_table_ix),r0
3420 mov.b @(r0,r5),r1
3421 mov.l r5,@-r15
3422LOCAL(div_le128_2):
3423 mova LOCAL(div_table_inv),r0
3424 mov.l @(r0,r1),r1
3425 mov r5,r0
3426 tst #0xfe,r0
3427 mova LOCAL(div_table_clz),r0
3428 dmulu.l r1,r4
3429 mov.b @(r0,r5),r1
3430 bt/s LOCAL(div_by_1)
3431 mov r4,r0
3432 mov.l @r15+,r5
3433 sts mach,r0
3434 /* clrt */
3435 addc r4,r0
3436 mov.l @r15+,r4
3437 rotcr r0
3438 rts
3439 shld r1,r0
3440
3441LOCAL(div_by_1_neg):
3442 neg r4,r0
3443LOCAL(div_by_1):
3444 mov.l @r15+,r5
3445 rts
3446 mov.l @r15+,r4
3447
3448LOCAL(div_ge64k):
3449 bt/s LOCAL(div_r8)
3450 div0u
3451 shll8 r5
3452 bra LOCAL(div_ge64k_2)
3453 div1 r5,r0
3454LOCAL(udiv_ge64k):
3455 cmp/hi r0,r5
3456 mov r5,r1
3457 bt LOCAL(udiv_r8)
3458 shll8 r5
3459 mov.l r4,@-r15
3460 div1 r5,r0
3461 mov.l r1,@-r15
3462LOCAL(div_ge64k_2):
3463 div1 r5,r0
3464 mov.l LOCAL(zero_l),r1
3465 .rept 4
3466 div1 r5,r0
3467 .endr
3468 mov.l r1,@-r15
3469 div1 r5,r0
3470 mov.w LOCAL(m256_w),r1
3471 div1 r5,r0
3472 mov.b r0,@(L_LSWMSB,r15)
3473 xor r4,r0
3474 and r1,r0
3475 bra LOCAL(div_ge64k_end)
3476 xor r4,r0
3477
3478LOCAL(div_r8):
3479 shll16 r4
3480 bra LOCAL(div_r8_2)
3481 shll8 r4
3482LOCAL(udiv_r8):
3483 mov.l r4,@-r15
3484 shll16 r4
3485 clrt
3486 shll8 r4
3487 mov.l r5,@-r15
3488LOCAL(div_r8_2):
3489 rotcl r4
3490 mov r0,r1
3491 div1 r5,r1
3492 mov r4,r0
3493 rotcl r0
3494 mov r5,r4
3495 div1 r5,r1
3496 .rept 5
3497 rotcl r0; div1 r5,r1
3498 .endr
3499 rotcl r0
3500 mov.l @r15+,r5
3501 div1 r4,r1
3502 mov.l @r15+,r4
3503 rts
3504 rotcl r0
3505
3506 ENDFUNC(GLOBAL(udivsi3_i4i))
3507
3508 .global GLOBAL(sdivsi3_i4i)
3509 FUNC(GLOBAL(sdivsi3_i4i))
3510 /* This is link-compatible with a GLOBAL(sdivsi3) call,
3511 but we effectively clobber only r1. */
3512GLOBAL(sdivsi3_i4i):
3513 mov.l r4,@-r15
3514 cmp/pz r5
3515 mov.w LOCAL(c128_w), r1
3516 bt/s LOCAL(pos_divisor)
3517 cmp/pz r4
3518 mov.l r5,@-r15
3519 neg r5,r5
3520 bt/s LOCAL(neg_result)
3521 cmp/hi r1,r5
3522 neg r4,r4
3523LOCAL(pos_result):
3524 extu.w r5,r0
3525 bf LOCAL(div_le128)
3526 cmp/eq r5,r0
3527 mov r4,r0
3528 shlr8 r0
3529 bf/s LOCAL(div_ge64k)
3530 cmp/hi r0,r5
3531 div0u
3532 shll16 r5
3533 div1 r5,r0
3534 div1 r5,r0
3535 div1 r5,r0
3536LOCAL(udiv_25):
3537 mov.l LOCAL(zero_l),r1
3538 div1 r5,r0
3539 div1 r5,r0
3540 mov.l r1,@-r15
3541 .rept 3
3542 div1 r5,r0
3543 .endr
3544 mov.b r0,@(L_MSWLSB,r15)
3545 xtrct r4,r0
3546 swap.w r0,r0
3547 .rept 8
3548 div1 r5,r0
3549 .endr
3550 mov.b r0,@(L_LSWMSB,r15)
3551LOCAL(div_ge64k_end):
3552 .rept 8
3553 div1 r5,r0
3554 .endr
3555 mov.l @r15+,r4 ! zero-extension and swap using LS unit.
3556 extu.b r0,r0
3557 mov.l @r15+,r5
3558 or r4,r0
3559 mov.l @r15+,r4
3560 rts
3561 rotcl r0
3562
3563LOCAL(div_le128_neg):
3564 tst #0xfe,r0
3565 mova LOCAL(div_table_ix),r0
3566 mov.b @(r0,r5),r1
3567 mova LOCAL(div_table_inv),r0
3568 bt/s LOCAL(div_by_1_neg)
3569 mov.l @(r0,r1),r1
3570 mova LOCAL(div_table_clz),r0
3571 dmulu.l r1,r4
3572 mov.b @(r0,r5),r1
3573 mov.l @r15+,r5
3574 sts mach,r0
3575 /* clrt */
3576 addc r4,r0
3577 mov.l @r15+,r4
3578 rotcr r0
3579 shld r1,r0
3580 rts
3581 neg r0,r0
3582
3583LOCAL(pos_divisor):
3584 mov.l r5,@-r15
3585 bt/s LOCAL(pos_result)
3586 cmp/hi r1,r5
3587 neg r4,r4
3588LOCAL(neg_result):
3589 extu.w r5,r0
3590 bf LOCAL(div_le128_neg)
3591 cmp/eq r5,r0
3592 mov r4,r0
3593 shlr8 r0
3594 bf/s LOCAL(div_ge64k_neg)
3595 cmp/hi r0,r5
3596 div0u
3597 mov.l LOCAL(zero_l),r1
3598 shll16 r5
3599 div1 r5,r0
3600 mov.l r1,@-r15
3601 .rept 7
3602 div1 r5,r0
3603 .endr
3604 mov.b r0,@(L_MSWLSB,r15)
3605 xtrct r4,r0
3606 swap.w r0,r0
3607 .rept 8
3608 div1 r5,r0
3609 .endr
3610 mov.b r0,@(L_LSWMSB,r15)
3611LOCAL(div_ge64k_neg_end):
3612 .rept 8
3613 div1 r5,r0
3614 .endr
3615 mov.l @r15+,r4 ! zero-extension and swap using LS unit.
3616 extu.b r0,r1
3617 mov.l @r15+,r5
3618 or r4,r1
3619LOCAL(div_r8_neg_end):
3620 mov.l @r15+,r4
3621 rotcl r1
3622 rts
3623 neg r1,r0
3624
3625LOCAL(div_ge64k_neg):
3626 bt/s LOCAL(div_r8_neg)
3627 div0u
3628 shll8 r5
3629 mov.l LOCAL(zero_l),r1
3630 .rept 6
3631 div1 r5,r0
3632 .endr
3633 mov.l r1,@-r15
3634 div1 r5,r0
3635 mov.w LOCAL(m256_w),r1
3636 div1 r5,r0
3637 mov.b r0,@(L_LSWMSB,r15)
3638 xor r4,r0
3639 and r1,r0
3640 bra LOCAL(div_ge64k_neg_end)
3641 xor r4,r0
3642
3643LOCAL(c128_w):
3644 .word 128
3645
3646LOCAL(div_r8_neg):
3647 clrt
3648 shll16 r4
3649 mov r4,r1
3650 shll8 r1
3651 mov r5,r4
3652 .rept 7
3653 rotcl r1; div1 r5,r0
3654 .endr
3655 mov.l @r15+,r5
3656 rotcl r1
3657 bra LOCAL(div_r8_neg_end)
3658 div1 r4,r0
3659
3660LOCAL(m256_w):
3661 .word 0xff00
3662/* This table has been generated by divtab-sh4.c. */
3663 .balign 4
3664LOCAL(div_table_clz):
3665 .byte 0
3666 .byte 1
3667 .byte 0
3668 .byte -1
3669 .byte -1
3670 .byte -2
3671 .byte -2
3672 .byte -2
3673 .byte -2
3674 .byte -3
3675 .byte -3
3676 .byte -3
3677 .byte -3
3678 .byte -3
3679 .byte -3
3680 .byte -3
3681 .byte -3
3682 .byte -4
3683 .byte -4
3684 .byte -4
3685 .byte -4
3686 .byte -4
3687 .byte -4
3688 .byte -4
3689 .byte -4
3690 .byte -4
3691 .byte -4
3692 .byte -4
3693 .byte -4
3694 .byte -4
3695 .byte -4
3696 .byte -4
3697 .byte -4
3698 .byte -5
3699 .byte -5
3700 .byte -5
3701 .byte -5
3702 .byte -5
3703 .byte -5
3704 .byte -5
3705 .byte -5
3706 .byte -5
3707 .byte -5
3708 .byte -5
3709 .byte -5
3710 .byte -5
3711 .byte -5
3712 .byte -5
3713 .byte -5
3714 .byte -5
3715 .byte -5
3716 .byte -5
3717 .byte -5
3718 .byte -5
3719 .byte -5
3720 .byte -5
3721 .byte -5
3722 .byte -5
3723 .byte -5
3724 .byte -5
3725 .byte -5
3726 .byte -5
3727 .byte -5
3728 .byte -5
3729 .byte -5
3730 .byte -6
3731 .byte -6
3732 .byte -6
3733 .byte -6
3734 .byte -6
3735 .byte -6
3736 .byte -6
3737 .byte -6
3738 .byte -6
3739 .byte -6
3740 .byte -6
3741 .byte -6
3742 .byte -6
3743 .byte -6
3744 .byte -6
3745 .byte -6
3746 .byte -6
3747 .byte -6
3748 .byte -6
3749 .byte -6
3750 .byte -6
3751 .byte -6
3752 .byte -6
3753 .byte -6
3754 .byte -6
3755 .byte -6
3756 .byte -6
3757 .byte -6
3758 .byte -6
3759 .byte -6
3760 .byte -6
3761 .byte -6
3762 .byte -6
3763 .byte -6
3764 .byte -6
3765 .byte -6
3766 .byte -6
3767 .byte -6
3768 .byte -6
3769 .byte -6
3770 .byte -6
3771 .byte -6
3772 .byte -6
3773 .byte -6
3774 .byte -6
3775 .byte -6
3776 .byte -6
3777 .byte -6
3778 .byte -6
3779 .byte -6
3780 .byte -6
3781 .byte -6
3782 .byte -6
3783 .byte -6
3784 .byte -6
3785 .byte -6
3786 .byte -6
3787 .byte -6
3788 .byte -6
3789 .byte -6
3790 .byte -6
3791 .byte -6
3792 .byte -6
3793/* Lookup table translating positive divisor to index into table of
3794 normalized inverse. N.B. the '0' entry is also the last entry of the
3795 previous table, and causes an unaligned access for division by zero. */
3796LOCAL(div_table_ix):
3797 .byte -6
3798 .byte -128
3799 .byte -128
3800 .byte 0
3801 .byte -128
3802 .byte -64
3803 .byte 0
3804 .byte 64
3805 .byte -128
3806 .byte -96
3807 .byte -64
3808 .byte -32
3809 .byte 0
3810 .byte 32
3811 .byte 64
3812 .byte 96
3813 .byte -128
3814 .byte -112
3815 .byte -96
3816 .byte -80
3817 .byte -64
3818 .byte -48
3819 .byte -32
3820 .byte -16
3821 .byte 0
3822 .byte 16
3823 .byte 32
3824 .byte 48
3825 .byte 64
3826 .byte 80
3827 .byte 96
3828 .byte 112
3829 .byte -128
3830 .byte -120
3831 .byte -112
3832 .byte -104
3833 .byte -96
3834 .byte -88
3835 .byte -80
3836 .byte -72
3837 .byte -64
3838 .byte -56
3839 .byte -48
3840 .byte -40
3841 .byte -32
3842 .byte -24
3843 .byte -16
3844 .byte -8
3845 .byte 0
3846 .byte 8
3847 .byte 16
3848 .byte 24
3849 .byte 32
3850 .byte 40
3851 .byte 48
3852 .byte 56
3853 .byte 64
3854 .byte 72
3855 .byte 80
3856 .byte 88
3857 .byte 96
3858 .byte 104
3859 .byte 112
3860 .byte 120
3861 .byte -128
3862 .byte -124
3863 .byte -120
3864 .byte -116
3865 .byte -112
3866 .byte -108
3867 .byte -104
3868 .byte -100
3869 .byte -96
3870 .byte -92
3871 .byte -88
3872 .byte -84
3873 .byte -80
3874 .byte -76
3875 .byte -72
3876 .byte -68
3877 .byte -64
3878 .byte -60
3879 .byte -56
3880 .byte -52
3881 .byte -48
3882 .byte -44
3883 .byte -40
3884 .byte -36
3885 .byte -32
3886 .byte -28
3887 .byte -24
3888 .byte -20
3889 .byte -16
3890 .byte -12
3891 .byte -8
3892 .byte -4
3893 .byte 0
3894 .byte 4
3895 .byte 8
3896 .byte 12
3897 .byte 16
3898 .byte 20
3899 .byte 24
3900 .byte 28
3901 .byte 32
3902 .byte 36
3903 .byte 40
3904 .byte 44
3905 .byte 48
3906 .byte 52
3907 .byte 56
3908 .byte 60
3909 .byte 64
3910 .byte 68
3911 .byte 72
3912 .byte 76
3913 .byte 80
3914 .byte 84
3915 .byte 88
3916 .byte 92
3917 .byte 96
3918 .byte 100
3919 .byte 104
3920 .byte 108
3921 .byte 112
3922 .byte 116
3923 .byte 120
3924 .byte 124
3925 .byte -128
3926/* 1/64 .. 1/127, normalized. There is an implicit leading 1 in bit 32. */
3927 .balign 4
3928LOCAL(zero_l):
3929 .long 0x0
3930 .long 0xF81F81F9
3931 .long 0xF07C1F08
3932 .long 0xE9131AC0
3933 .long 0xE1E1E1E2
3934 .long 0xDAE6076C
3935 .long 0xD41D41D5
3936 .long 0xCD856891
3937 .long 0xC71C71C8
3938 .long 0xC0E07039
3939 .long 0xBACF914D
3940 .long 0xB4E81B4F
3941 .long 0xAF286BCB
3942 .long 0xA98EF607
3943 .long 0xA41A41A5
3944 .long 0x9EC8E952
3945 .long 0x9999999A
3946 .long 0x948B0FCE
3947 .long 0x8F9C18FA
3948 .long 0x8ACB90F7
3949 .long 0x86186187
3950 .long 0x81818182
3951 .long 0x7D05F418
3952 .long 0x78A4C818
3953 .long 0x745D1746
3954 .long 0x702E05C1
3955 .long 0x6C16C16D
3956 .long 0x68168169
3957 .long 0x642C8591
3958 .long 0x60581606
3959 .long 0x5C9882BA
3960 .long 0x58ED2309
3961LOCAL(div_table_inv):
3962 .long 0x55555556
3963 .long 0x51D07EAF
3964 .long 0x4E5E0A73
3965 .long 0x4AFD6A06
3966 .long 0x47AE147B
3967 .long 0x446F8657
3968 .long 0x41414142
3969 .long 0x3E22CBCF
3970 .long 0x3B13B13C
3971 .long 0x38138139
3972 .long 0x3521CFB3
3973 .long 0x323E34A3
3974 .long 0x2F684BDB
3975 .long 0x2C9FB4D9
3976 .long 0x29E4129F
3977 .long 0x27350B89
3978 .long 0x24924925
3979 .long 0x21FB7813
3980 .long 0x1F7047DD
3981 .long 0x1CF06ADB
3982 .long 0x1A7B9612
3983 .long 0x18118119
3984 .long 0x15B1E5F8
3985 .long 0x135C8114
3986 .long 0x11111112
3987 .long 0xECF56BF
3988 .long 0xC9714FC
3989 .long 0xA6810A7
3990 .long 0x8421085
3991 .long 0x624DD30
3992 .long 0x4104105
3993 .long 0x2040811
3994 /* maximum error: 0.987342 scaled: 0.921875*/
3995
3996 ENDFUNC(GLOBAL(sdivsi3_i4i))
3997#endif /* SH3 / SH4 */
3998
59312820 3999#endif /* L_div_table */
2b2f5cfb 4000
4001#ifdef L_udiv_qrnnd_16
4002#if !__SHMEDIA__
4003 HIDDEN_FUNC(GLOBAL(udiv_qrnnd_16))
4004 /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */
4005 /* n1 < d, but n1 might be larger than d1. */
4006 .global GLOBAL(udiv_qrnnd_16)
4007 .balign 8
4008GLOBAL(udiv_qrnnd_16):
4009 div0u
4010 cmp/hi r6,r0
4011 bt .Lots
4012 .rept 16
4013 div1 r6,r0
4014 .endr
4015 extu.w r0,r1
4016 bt 0f
4017 add r6,r0
40180: rotcl r1
4019 mulu.w r1,r5
4020 xtrct r4,r0
4021 swap.w r0,r0
4022 sts macl,r2
4023 cmp/hs r2,r0
4024 sub r2,r0
4025 bt 0f
4026 addc r5,r0
4027 add #-1,r1
4028 bt 0f
40291: add #-1,r1
4030 rts
4031 add r5,r0
4032 .balign 8
4033.Lots:
4034 sub r5,r0
4035 swap.w r4,r1
4036 xtrct r0,r1
4037 clrt
4038 mov r1,r0
4039 addc r5,r0
4040 mov #-1,r1
4041 SL1(bf, 1b,
4042 shlr16 r1)
40430: rts
4044 nop
4045 ENDFUNC(GLOBAL(udiv_qrnnd_16))
4046#endif /* !__SHMEDIA__ */
4047#endif /* L_udiv_qrnnd_16 */