]>
Commit | Line | Data |
---|---|---|
f1717362 | 1 | /* Copyright (C) 1994-2016 Free Software Foundation, Inc. |
a85a2db0 | 2 | |
3 | This file is free software; you can redistribute it and/or modify it | |
4 | under the terms of the GNU General Public License as published by the | |
6bc9506f | 5 | Free Software Foundation; either version 3, or (at your option) any |
a85a2db0 | 6 | later version. |
7 | ||
a85a2db0 | 8 | This file is distributed in the hope that it will be useful, but |
9 | WITHOUT ANY WARRANTY; without even the implied warranty of | |
10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
11 | General Public License for more details. | |
12 | ||
6bc9506f | 13 | Under Section 7 of GPL version 3, you are granted additional |
14 | permissions described in the GCC Runtime Library Exception, version | |
15 | 3.1, as published by the Free Software Foundation. | |
16 | ||
17 | You should have received a copy of the GNU General Public License and | |
18 | a copy of the GCC Runtime Library Exception along with this program; | |
19 | see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
20 | <http://www.gnu.org/licenses/>. */ | |
21 | ||
a85a2db0 | 22 | |
7b23765d | 23 | !! libgcc routines for the Renesas / SuperH SH CPUs. |
a85a2db0 | 24 | !! Contributed by Steve Chamberlain. |
25 | !! sac@cygnus.com | |
26 | ||
30b0bb86 | 27 | !! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines |
28 | !! recoded in assembly by Toshiyasu Morita | |
29 | !! tm@netcom.com | |
a85a2db0 | 30 | |
cea9fc42 | 31 | #if defined(__ELF__) && defined(__linux__) |
32 | .section .note.GNU-stack,"",%progbits | |
33 | .previous | |
34 | #endif | |
35 | ||
d73f1571 | 36 | /* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and |
37 | ELF local label prefixes by J"orn Rennecke | |
38 | amylaar@cygnus.com */ | |
39 | ||
2fa59365 | 40 | #include "lib1funcs.h" |
7105fb72 | 41 | |
a9cfe83b | 42 | /* t-vxworks needs to build both PIC and non-PIC versions of libgcc, |
43 | so it is more convenient to define NO_FPSCR_VALUES here than to | |
44 | define it on the command line. */ | |
45 | #if defined __vxworks && defined __PIC__ | |
46 | #define NO_FPSCR_VALUES | |
47 | #endif | |
48 | ||
87e19636 | 49 | #if ! __SH5__ |
a85a2db0 | 50 | #ifdef L_ashiftrt |
16f1dae0 | 51 | .global GLOBAL(ashiftrt_r4_0) |
52 | .global GLOBAL(ashiftrt_r4_1) | |
53 | .global GLOBAL(ashiftrt_r4_2) | |
54 | .global GLOBAL(ashiftrt_r4_3) | |
55 | .global GLOBAL(ashiftrt_r4_4) | |
56 | .global GLOBAL(ashiftrt_r4_5) | |
57 | .global GLOBAL(ashiftrt_r4_6) | |
58 | .global GLOBAL(ashiftrt_r4_7) | |
59 | .global GLOBAL(ashiftrt_r4_8) | |
60 | .global GLOBAL(ashiftrt_r4_9) | |
61 | .global GLOBAL(ashiftrt_r4_10) | |
62 | .global GLOBAL(ashiftrt_r4_11) | |
63 | .global GLOBAL(ashiftrt_r4_12) | |
64 | .global GLOBAL(ashiftrt_r4_13) | |
65 | .global GLOBAL(ashiftrt_r4_14) | |
66 | .global GLOBAL(ashiftrt_r4_15) | |
67 | .global GLOBAL(ashiftrt_r4_16) | |
68 | .global GLOBAL(ashiftrt_r4_17) | |
69 | .global GLOBAL(ashiftrt_r4_18) | |
70 | .global GLOBAL(ashiftrt_r4_19) | |
71 | .global GLOBAL(ashiftrt_r4_20) | |
72 | .global GLOBAL(ashiftrt_r4_21) | |
73 | .global GLOBAL(ashiftrt_r4_22) | |
74 | .global GLOBAL(ashiftrt_r4_23) | |
75 | .global GLOBAL(ashiftrt_r4_24) | |
76 | .global GLOBAL(ashiftrt_r4_25) | |
77 | .global GLOBAL(ashiftrt_r4_26) | |
78 | .global GLOBAL(ashiftrt_r4_27) | |
79 | .global GLOBAL(ashiftrt_r4_28) | |
80 | .global GLOBAL(ashiftrt_r4_29) | |
81 | .global GLOBAL(ashiftrt_r4_30) | |
82 | .global GLOBAL(ashiftrt_r4_31) | |
83 | .global GLOBAL(ashiftrt_r4_32) | |
a85a2db0 | 84 | |
59312820 | 85 | HIDDEN_FUNC(GLOBAL(ashiftrt_r4_0)) |
86 | HIDDEN_FUNC(GLOBAL(ashiftrt_r4_1)) | |
87 | HIDDEN_FUNC(GLOBAL(ashiftrt_r4_2)) | |
88 | HIDDEN_FUNC(GLOBAL(ashiftrt_r4_3)) | |
89 | HIDDEN_FUNC(GLOBAL(ashiftrt_r4_4)) | |
90 | HIDDEN_FUNC(GLOBAL(ashiftrt_r4_5)) | |
91 | HIDDEN_FUNC(GLOBAL(ashiftrt_r4_6)) | |
92 | HIDDEN_FUNC(GLOBAL(ashiftrt_r4_7)) | |
93 | HIDDEN_FUNC(GLOBAL(ashiftrt_r4_8)) | |
94 | HIDDEN_FUNC(GLOBAL(ashiftrt_r4_9)) | |
95 | HIDDEN_FUNC(GLOBAL(ashiftrt_r4_10)) | |
96 | HIDDEN_FUNC(GLOBAL(ashiftrt_r4_11)) | |
97 | HIDDEN_FUNC(GLOBAL(ashiftrt_r4_12)) | |
98 | HIDDEN_FUNC(GLOBAL(ashiftrt_r4_13)) | |
99 | HIDDEN_FUNC(GLOBAL(ashiftrt_r4_14)) | |
100 | HIDDEN_FUNC(GLOBAL(ashiftrt_r4_15)) | |
101 | HIDDEN_FUNC(GLOBAL(ashiftrt_r4_16)) | |
102 | HIDDEN_FUNC(GLOBAL(ashiftrt_r4_17)) | |
103 | HIDDEN_FUNC(GLOBAL(ashiftrt_r4_18)) | |
104 | HIDDEN_FUNC(GLOBAL(ashiftrt_r4_19)) | |
105 | HIDDEN_FUNC(GLOBAL(ashiftrt_r4_20)) | |
106 | HIDDEN_FUNC(GLOBAL(ashiftrt_r4_21)) | |
107 | HIDDEN_FUNC(GLOBAL(ashiftrt_r4_22)) | |
108 | HIDDEN_FUNC(GLOBAL(ashiftrt_r4_23)) | |
109 | HIDDEN_FUNC(GLOBAL(ashiftrt_r4_24)) | |
110 | HIDDEN_FUNC(GLOBAL(ashiftrt_r4_25)) | |
111 | HIDDEN_FUNC(GLOBAL(ashiftrt_r4_26)) | |
112 | HIDDEN_FUNC(GLOBAL(ashiftrt_r4_27)) | |
113 | HIDDEN_FUNC(GLOBAL(ashiftrt_r4_28)) | |
114 | HIDDEN_FUNC(GLOBAL(ashiftrt_r4_29)) | |
115 | HIDDEN_FUNC(GLOBAL(ashiftrt_r4_30)) | |
116 | HIDDEN_FUNC(GLOBAL(ashiftrt_r4_31)) | |
117 | HIDDEN_FUNC(GLOBAL(ashiftrt_r4_32)) | |
805e22b2 | 118 | |
30b0bb86 | 119 | .align 1 |
16f1dae0 | 120 | GLOBAL(ashiftrt_r4_32): |
121 | GLOBAL(ashiftrt_r4_31): | |
30b0bb86 | 122 | rotcl r4 |
123 | rts | |
124 | subc r4,r4 | |
125 | ||
16f1dae0 | 126 | GLOBAL(ashiftrt_r4_30): |
a85a2db0 | 127 | shar r4 |
16f1dae0 | 128 | GLOBAL(ashiftrt_r4_29): |
a85a2db0 | 129 | shar r4 |
16f1dae0 | 130 | GLOBAL(ashiftrt_r4_28): |
a85a2db0 | 131 | shar r4 |
16f1dae0 | 132 | GLOBAL(ashiftrt_r4_27): |
a85a2db0 | 133 | shar r4 |
16f1dae0 | 134 | GLOBAL(ashiftrt_r4_26): |
a85a2db0 | 135 | shar r4 |
16f1dae0 | 136 | GLOBAL(ashiftrt_r4_25): |
a85a2db0 | 137 | shar r4 |
16f1dae0 | 138 | GLOBAL(ashiftrt_r4_24): |
30b0bb86 | 139 | shlr16 r4 |
140 | shlr8 r4 | |
141 | rts | |
142 | exts.b r4,r4 | |
143 | ||
16f1dae0 | 144 | GLOBAL(ashiftrt_r4_23): |
a85a2db0 | 145 | shar r4 |
16f1dae0 | 146 | GLOBAL(ashiftrt_r4_22): |
a85a2db0 | 147 | shar r4 |
16f1dae0 | 148 | GLOBAL(ashiftrt_r4_21): |
a85a2db0 | 149 | shar r4 |
16f1dae0 | 150 | GLOBAL(ashiftrt_r4_20): |
a85a2db0 | 151 | shar r4 |
16f1dae0 | 152 | GLOBAL(ashiftrt_r4_19): |
a85a2db0 | 153 | shar r4 |
16f1dae0 | 154 | GLOBAL(ashiftrt_r4_18): |
a85a2db0 | 155 | shar r4 |
16f1dae0 | 156 | GLOBAL(ashiftrt_r4_17): |
a85a2db0 | 157 | shar r4 |
16f1dae0 | 158 | GLOBAL(ashiftrt_r4_16): |
30b0bb86 | 159 | shlr16 r4 |
160 | rts | |
161 | exts.w r4,r4 | |
162 | ||
16f1dae0 | 163 | GLOBAL(ashiftrt_r4_15): |
a85a2db0 | 164 | shar r4 |
16f1dae0 | 165 | GLOBAL(ashiftrt_r4_14): |
a85a2db0 | 166 | shar r4 |
16f1dae0 | 167 | GLOBAL(ashiftrt_r4_13): |
a85a2db0 | 168 | shar r4 |
16f1dae0 | 169 | GLOBAL(ashiftrt_r4_12): |
a85a2db0 | 170 | shar r4 |
16f1dae0 | 171 | GLOBAL(ashiftrt_r4_11): |
a85a2db0 | 172 | shar r4 |
16f1dae0 | 173 | GLOBAL(ashiftrt_r4_10): |
a85a2db0 | 174 | shar r4 |
16f1dae0 | 175 | GLOBAL(ashiftrt_r4_9): |
a85a2db0 | 176 | shar r4 |
16f1dae0 | 177 | GLOBAL(ashiftrt_r4_8): |
a85a2db0 | 178 | shar r4 |
16f1dae0 | 179 | GLOBAL(ashiftrt_r4_7): |
a85a2db0 | 180 | shar r4 |
16f1dae0 | 181 | GLOBAL(ashiftrt_r4_6): |
a85a2db0 | 182 | shar r4 |
16f1dae0 | 183 | GLOBAL(ashiftrt_r4_5): |
a85a2db0 | 184 | shar r4 |
16f1dae0 | 185 | GLOBAL(ashiftrt_r4_4): |
a85a2db0 | 186 | shar r4 |
16f1dae0 | 187 | GLOBAL(ashiftrt_r4_3): |
a85a2db0 | 188 | shar r4 |
16f1dae0 | 189 | GLOBAL(ashiftrt_r4_2): |
a85a2db0 | 190 | shar r4 |
16f1dae0 | 191 | GLOBAL(ashiftrt_r4_1): |
a85a2db0 | 192 | rts |
193 | shar r4 | |
194 | ||
16f1dae0 | 195 | GLOBAL(ashiftrt_r4_0): |
a85a2db0 | 196 | rts |
627d08ca | 197 | nop |
805e22b2 | 198 | |
199 | ENDFUNC(GLOBAL(ashiftrt_r4_0)) | |
200 | ENDFUNC(GLOBAL(ashiftrt_r4_1)) | |
201 | ENDFUNC(GLOBAL(ashiftrt_r4_2)) | |
202 | ENDFUNC(GLOBAL(ashiftrt_r4_3)) | |
203 | ENDFUNC(GLOBAL(ashiftrt_r4_4)) | |
204 | ENDFUNC(GLOBAL(ashiftrt_r4_5)) | |
205 | ENDFUNC(GLOBAL(ashiftrt_r4_6)) | |
206 | ENDFUNC(GLOBAL(ashiftrt_r4_7)) | |
207 | ENDFUNC(GLOBAL(ashiftrt_r4_8)) | |
208 | ENDFUNC(GLOBAL(ashiftrt_r4_9)) | |
209 | ENDFUNC(GLOBAL(ashiftrt_r4_10)) | |
210 | ENDFUNC(GLOBAL(ashiftrt_r4_11)) | |
211 | ENDFUNC(GLOBAL(ashiftrt_r4_12)) | |
212 | ENDFUNC(GLOBAL(ashiftrt_r4_13)) | |
213 | ENDFUNC(GLOBAL(ashiftrt_r4_14)) | |
214 | ENDFUNC(GLOBAL(ashiftrt_r4_15)) | |
215 | ENDFUNC(GLOBAL(ashiftrt_r4_16)) | |
216 | ENDFUNC(GLOBAL(ashiftrt_r4_17)) | |
217 | ENDFUNC(GLOBAL(ashiftrt_r4_18)) | |
218 | ENDFUNC(GLOBAL(ashiftrt_r4_19)) | |
219 | ENDFUNC(GLOBAL(ashiftrt_r4_20)) | |
220 | ENDFUNC(GLOBAL(ashiftrt_r4_21)) | |
221 | ENDFUNC(GLOBAL(ashiftrt_r4_22)) | |
222 | ENDFUNC(GLOBAL(ashiftrt_r4_23)) | |
223 | ENDFUNC(GLOBAL(ashiftrt_r4_24)) | |
224 | ENDFUNC(GLOBAL(ashiftrt_r4_25)) | |
225 | ENDFUNC(GLOBAL(ashiftrt_r4_26)) | |
226 | ENDFUNC(GLOBAL(ashiftrt_r4_27)) | |
227 | ENDFUNC(GLOBAL(ashiftrt_r4_28)) | |
228 | ENDFUNC(GLOBAL(ashiftrt_r4_29)) | |
229 | ENDFUNC(GLOBAL(ashiftrt_r4_30)) | |
230 | ENDFUNC(GLOBAL(ashiftrt_r4_31)) | |
231 | ENDFUNC(GLOBAL(ashiftrt_r4_32)) | |
a85a2db0 | 232 | #endif |
233 | ||
30b0bb86 | 234 | #ifdef L_ashiftrt_n |
235 | ||
236 | ! | |
16f1dae0 | 237 | ! GLOBAL(ashrsi3) |
30b0bb86 | 238 | ! |
239 | ! Entry: | |
240 | ! | |
241 | ! r4: Value to shift | |
6e7c6395 | 242 | ! r5: Shift count |
30b0bb86 | 243 | ! |
244 | ! Exit: | |
245 | ! | |
246 | ! r0: Result | |
247 | ! | |
248 | ! Destroys: | |
249 | ! | |
6e7c6395 | 250 | ! T bit, r5 |
30b0bb86 | 251 | ! |
252 | ||
16f1dae0 | 253 | .global GLOBAL(ashrsi3) |
59312820 | 254 | HIDDEN_FUNC(GLOBAL(ashrsi3)) |
30b0bb86 | 255 | .align 2 |
16f1dae0 | 256 | GLOBAL(ashrsi3): |
30b0bb86 | 257 | mov #31,r0 |
d73f1571 | 258 | and r0,r5 |
259 | mova LOCAL(ashrsi3_table),r0 | |
0f04c0de | 260 | mov.b @(r0,r5),r5 |
d73f1571 | 261 | #ifdef __sh1__ |
262 | add r5,r0 | |
30b0bb86 | 263 | jmp @r0 |
d73f1571 | 264 | #else |
265 | braf r5 | |
266 | #endif | |
30b0bb86 | 267 | mov r4,r0 |
268 | ||
d73f1571 | 269 | .align 2 |
270 | LOCAL(ashrsi3_table): | |
271 | .byte LOCAL(ashrsi3_0)-LOCAL(ashrsi3_table) | |
272 | .byte LOCAL(ashrsi3_1)-LOCAL(ashrsi3_table) | |
273 | .byte LOCAL(ashrsi3_2)-LOCAL(ashrsi3_table) | |
274 | .byte LOCAL(ashrsi3_3)-LOCAL(ashrsi3_table) | |
275 | .byte LOCAL(ashrsi3_4)-LOCAL(ashrsi3_table) | |
276 | .byte LOCAL(ashrsi3_5)-LOCAL(ashrsi3_table) | |
277 | .byte LOCAL(ashrsi3_6)-LOCAL(ashrsi3_table) | |
278 | .byte LOCAL(ashrsi3_7)-LOCAL(ashrsi3_table) | |
279 | .byte LOCAL(ashrsi3_8)-LOCAL(ashrsi3_table) | |
280 | .byte LOCAL(ashrsi3_9)-LOCAL(ashrsi3_table) | |
281 | .byte LOCAL(ashrsi3_10)-LOCAL(ashrsi3_table) | |
282 | .byte LOCAL(ashrsi3_11)-LOCAL(ashrsi3_table) | |
283 | .byte LOCAL(ashrsi3_12)-LOCAL(ashrsi3_table) | |
284 | .byte LOCAL(ashrsi3_13)-LOCAL(ashrsi3_table) | |
285 | .byte LOCAL(ashrsi3_14)-LOCAL(ashrsi3_table) | |
286 | .byte LOCAL(ashrsi3_15)-LOCAL(ashrsi3_table) | |
287 | .byte LOCAL(ashrsi3_16)-LOCAL(ashrsi3_table) | |
288 | .byte LOCAL(ashrsi3_17)-LOCAL(ashrsi3_table) | |
289 | .byte LOCAL(ashrsi3_18)-LOCAL(ashrsi3_table) | |
290 | .byte LOCAL(ashrsi3_19)-LOCAL(ashrsi3_table) | |
291 | .byte LOCAL(ashrsi3_20)-LOCAL(ashrsi3_table) | |
292 | .byte LOCAL(ashrsi3_21)-LOCAL(ashrsi3_table) | |
293 | .byte LOCAL(ashrsi3_22)-LOCAL(ashrsi3_table) | |
294 | .byte LOCAL(ashrsi3_23)-LOCAL(ashrsi3_table) | |
295 | .byte LOCAL(ashrsi3_24)-LOCAL(ashrsi3_table) | |
296 | .byte LOCAL(ashrsi3_25)-LOCAL(ashrsi3_table) | |
297 | .byte LOCAL(ashrsi3_26)-LOCAL(ashrsi3_table) | |
298 | .byte LOCAL(ashrsi3_27)-LOCAL(ashrsi3_table) | |
299 | .byte LOCAL(ashrsi3_28)-LOCAL(ashrsi3_table) | |
300 | .byte LOCAL(ashrsi3_29)-LOCAL(ashrsi3_table) | |
301 | .byte LOCAL(ashrsi3_30)-LOCAL(ashrsi3_table) | |
302 | .byte LOCAL(ashrsi3_31)-LOCAL(ashrsi3_table) | |
303 | ||
304 | LOCAL(ashrsi3_31): | |
30b0bb86 | 305 | rotcl r0 |
306 | rts | |
307 | subc r0,r0 | |
308 | ||
d73f1571 | 309 | LOCAL(ashrsi3_30): |
30b0bb86 | 310 | shar r0 |
d73f1571 | 311 | LOCAL(ashrsi3_29): |
30b0bb86 | 312 | shar r0 |
d73f1571 | 313 | LOCAL(ashrsi3_28): |
30b0bb86 | 314 | shar r0 |
d73f1571 | 315 | LOCAL(ashrsi3_27): |
30b0bb86 | 316 | shar r0 |
d73f1571 | 317 | LOCAL(ashrsi3_26): |
30b0bb86 | 318 | shar r0 |
d73f1571 | 319 | LOCAL(ashrsi3_25): |
30b0bb86 | 320 | shar r0 |
d73f1571 | 321 | LOCAL(ashrsi3_24): |
30b0bb86 | 322 | shlr16 r0 |
323 | shlr8 r0 | |
324 | rts | |
325 | exts.b r0,r0 | |
326 | ||
d73f1571 | 327 | LOCAL(ashrsi3_23): |
30b0bb86 | 328 | shar r0 |
d73f1571 | 329 | LOCAL(ashrsi3_22): |
30b0bb86 | 330 | shar r0 |
d73f1571 | 331 | LOCAL(ashrsi3_21): |
30b0bb86 | 332 | shar r0 |
d73f1571 | 333 | LOCAL(ashrsi3_20): |
30b0bb86 | 334 | shar r0 |
d73f1571 | 335 | LOCAL(ashrsi3_19): |
30b0bb86 | 336 | shar r0 |
d73f1571 | 337 | LOCAL(ashrsi3_18): |
30b0bb86 | 338 | shar r0 |
d73f1571 | 339 | LOCAL(ashrsi3_17): |
30b0bb86 | 340 | shar r0 |
d73f1571 | 341 | LOCAL(ashrsi3_16): |
30b0bb86 | 342 | shlr16 r0 |
343 | rts | |
344 | exts.w r0,r0 | |
345 | ||
d73f1571 | 346 | LOCAL(ashrsi3_15): |
30b0bb86 | 347 | shar r0 |
d73f1571 | 348 | LOCAL(ashrsi3_14): |
30b0bb86 | 349 | shar r0 |
d73f1571 | 350 | LOCAL(ashrsi3_13): |
30b0bb86 | 351 | shar r0 |
d73f1571 | 352 | LOCAL(ashrsi3_12): |
30b0bb86 | 353 | shar r0 |
d73f1571 | 354 | LOCAL(ashrsi3_11): |
30b0bb86 | 355 | shar r0 |
d73f1571 | 356 | LOCAL(ashrsi3_10): |
30b0bb86 | 357 | shar r0 |
d73f1571 | 358 | LOCAL(ashrsi3_9): |
30b0bb86 | 359 | shar r0 |
d73f1571 | 360 | LOCAL(ashrsi3_8): |
30b0bb86 | 361 | shar r0 |
d73f1571 | 362 | LOCAL(ashrsi3_7): |
30b0bb86 | 363 | shar r0 |
d73f1571 | 364 | LOCAL(ashrsi3_6): |
30b0bb86 | 365 | shar r0 |
d73f1571 | 366 | LOCAL(ashrsi3_5): |
30b0bb86 | 367 | shar r0 |
d73f1571 | 368 | LOCAL(ashrsi3_4): |
30b0bb86 | 369 | shar r0 |
d73f1571 | 370 | LOCAL(ashrsi3_3): |
30b0bb86 | 371 | shar r0 |
d73f1571 | 372 | LOCAL(ashrsi3_2): |
30b0bb86 | 373 | shar r0 |
d73f1571 | 374 | LOCAL(ashrsi3_1): |
30b0bb86 | 375 | rts |
376 | shar r0 | |
377 | ||
d73f1571 | 378 | LOCAL(ashrsi3_0): |
30b0bb86 | 379 | rts |
380 | nop | |
381 | ||
805e22b2 | 382 | ENDFUNC(GLOBAL(ashrsi3)) |
30b0bb86 | 383 | #endif |
384 | ||
385 | #ifdef L_ashiftlt | |
386 | ||
387 | ! | |
16f1dae0 | 388 | ! GLOBAL(ashlsi3) |
6e7c6395 | 389 | ! (For compatibility with older binaries, not used by compiler) |
30b0bb86 | 390 | ! |
391 | ! Entry: | |
6e7c6395 | 392 | ! r4: Value to shift |
393 | ! r5: Shift count | |
30b0bb86 | 394 | ! |
395 | ! Exit: | |
6e7c6395 | 396 | ! r0: Result |
30b0bb86 | 397 | ! |
398 | ! Destroys: | |
6e7c6395 | 399 | ! T bit |
400 | ! | |
30b0bb86 | 401 | ! |
6e7c6395 | 402 | ! GLOBAL(ashlsi3_r0) |
30b0bb86 | 403 | ! |
6e7c6395 | 404 | ! Entry: |
405 | ! r4: Value to shift | |
406 | ! r0: Shift count | |
407 | ! | |
408 | ! Exit: | |
409 | ! r0: Result | |
410 | ! | |
411 | ! Destroys: | |
412 | ! T bit | |
413 | ||
16f1dae0 | 414 | .global GLOBAL(ashlsi3) |
6e7c6395 | 415 | .global GLOBAL(ashlsi3_r0) |
59312820 | 416 | HIDDEN_FUNC(GLOBAL(ashlsi3)) |
6e7c6395 | 417 | HIDDEN_FUNC(GLOBAL(ashlsi3_r0)) |
16f1dae0 | 418 | GLOBAL(ashlsi3): |
6e7c6395 | 419 | mov r5,r0 |
420 | .align 2 | |
421 | GLOBAL(ashlsi3_r0): | |
422 | ||
d73f1571 | 423 | #ifdef __sh1__ |
6e7c6395 | 424 | and #31,r0 |
425 | shll2 r0 | |
426 | mov.l r4,@-r15 | |
427 | mov r0,r4 | |
428 | mova LOCAL(ashlsi3_table),r0 | |
429 | add r4,r0 | |
430 | mov.l @r15+,r4 | |
30b0bb86 | 431 | jmp @r0 |
6e7c6395 | 432 | mov r4,r0 |
433 | .align 2 | |
d73f1571 | 434 | #else |
6e7c6395 | 435 | and #31,r0 |
436 | shll2 r0 | |
437 | braf r0 | |
30b0bb86 | 438 | mov r4,r0 |
6e7c6395 | 439 | #endif |
30b0bb86 | 440 | |
d73f1571 | 441 | LOCAL(ashlsi3_table): |
6e7c6395 | 442 | rts // << 0 |
443 | nop | |
444 | LOCAL(ashlsi_1): | |
445 | rts // << 1 | |
446 | shll r0 | |
447 | LOCAL(ashlsi_2): // << 2 | |
30b0bb86 | 448 | rts |
449 | shll2 r0 | |
6e7c6395 | 450 | bra LOCAL(ashlsi_1) // << 3 |
30b0bb86 | 451 | shll2 r0 |
6e7c6395 | 452 | bra LOCAL(ashlsi_2) // << 4 |
30b0bb86 | 453 | shll2 r0 |
6e7c6395 | 454 | bra LOCAL(ashlsi_5) // << 5 |
30b0bb86 | 455 | shll r0 |
6e7c6395 | 456 | bra LOCAL(ashlsi_6) // << 6 |
30b0bb86 | 457 | shll2 r0 |
6e7c6395 | 458 | bra LOCAL(ashlsi_7) // << 7 |
459 | shll r0 | |
460 | LOCAL(ashlsi_8): // << 8 | |
30b0bb86 | 461 | rts |
462 | shll8 r0 | |
6e7c6395 | 463 | bra LOCAL(ashlsi_8) // << 9 |
464 | shll r0 | |
465 | bra LOCAL(ashlsi_8) // << 10 | |
30b0bb86 | 466 | shll2 r0 |
6e7c6395 | 467 | bra LOCAL(ashlsi_11) // << 11 |
468 | shll r0 | |
469 | bra LOCAL(ashlsi_12) // << 12 | |
30b0bb86 | 470 | shll2 r0 |
6e7c6395 | 471 | bra LOCAL(ashlsi_13) // << 13 |
472 | shll r0 | |
473 | bra LOCAL(ashlsi_14) // << 14 | |
30b0bb86 | 474 | shll8 r0 |
6e7c6395 | 475 | bra LOCAL(ashlsi_15) // << 15 |
476 | shll8 r0 | |
477 | LOCAL(ashlsi_16): // << 16 | |
30b0bb86 | 478 | rts |
6e7c6395 | 479 | shll16 r0 |
480 | bra LOCAL(ashlsi_16) // << 17 | |
30b0bb86 | 481 | shll r0 |
6e7c6395 | 482 | bra LOCAL(ashlsi_16) // << 18 |
30b0bb86 | 483 | shll2 r0 |
6e7c6395 | 484 | bra LOCAL(ashlsi_19) // << 19 |
485 | shll r0 | |
486 | bra LOCAL(ashlsi_20) // << 20 | |
30b0bb86 | 487 | shll2 r0 |
6e7c6395 | 488 | bra LOCAL(ashlsi_21) // << 21 |
489 | shll r0 | |
490 | bra LOCAL(ashlsi_22) // << 22 | |
30b0bb86 | 491 | shll16 r0 |
6e7c6395 | 492 | bra LOCAL(ashlsi_23) // << 23 |
493 | shll16 r0 | |
494 | bra LOCAL(ashlsi_16) // << 24 | |
495 | shll8 r0 | |
496 | bra LOCAL(ashlsi_25) // << 25 | |
497 | shll r0 | |
498 | bra LOCAL(ashlsi_26) // << 26 | |
30b0bb86 | 499 | shll2 r0 |
6e7c6395 | 500 | bra LOCAL(ashlsi_27) // << 27 |
501 | shll r0 | |
502 | bra LOCAL(ashlsi_28) // << 28 | |
30b0bb86 | 503 | shll2 r0 |
6e7c6395 | 504 | bra LOCAL(ashlsi_29) // << 29 |
505 | shll16 r0 | |
506 | bra LOCAL(ashlsi_30) // << 30 | |
30b0bb86 | 507 | shll16 r0 |
6e7c6395 | 508 | and #1,r0 // << 31 |
30b0bb86 | 509 | rts |
6e7c6395 | 510 | rotr r0 |
30b0bb86 | 511 | |
6e7c6395 | 512 | LOCAL(ashlsi_7): |
30b0bb86 | 513 | shll2 r0 |
6e7c6395 | 514 | LOCAL(ashlsi_5): |
515 | LOCAL(ashlsi_6): | |
30b0bb86 | 516 | shll2 r0 |
30b0bb86 | 517 | rts |
6e7c6395 | 518 | LOCAL(ashlsi_13): |
30b0bb86 | 519 | shll2 r0 |
6e7c6395 | 520 | LOCAL(ashlsi_12): |
521 | LOCAL(ashlsi_11): | |
522 | shll8 r0 | |
523 | rts | |
524 | LOCAL(ashlsi_21): | |
30b0bb86 | 525 | shll2 r0 |
6e7c6395 | 526 | LOCAL(ashlsi_20): |
527 | LOCAL(ashlsi_19): | |
528 | shll16 r0 | |
529 | rts | |
530 | LOCAL(ashlsi_28): | |
531 | LOCAL(ashlsi_27): | |
30b0bb86 | 532 | shll2 r0 |
6e7c6395 | 533 | LOCAL(ashlsi_26): |
534 | LOCAL(ashlsi_25): | |
30b0bb86 | 535 | shll16 r0 |
6e7c6395 | 536 | rts |
30b0bb86 | 537 | shll8 r0 |
6e7c6395 | 538 | |
539 | LOCAL(ashlsi_22): | |
540 | LOCAL(ashlsi_14): | |
541 | shlr2 r0 | |
30b0bb86 | 542 | rts |
6e7c6395 | 543 | shll8 r0 |
30b0bb86 | 544 | |
6e7c6395 | 545 | LOCAL(ashlsi_23): |
546 | LOCAL(ashlsi_15): | |
547 | shlr r0 | |
30b0bb86 | 548 | rts |
6e7c6395 | 549 | shll8 r0 |
550 | ||
551 | LOCAL(ashlsi_29): | |
552 | shlr r0 | |
553 | LOCAL(ashlsi_30): | |
554 | shlr2 r0 | |
555 | rts | |
556 | shll16 r0 | |
30b0bb86 | 557 | |
805e22b2 | 558 | ENDFUNC(GLOBAL(ashlsi3)) |
6e7c6395 | 559 | ENDFUNC(GLOBAL(ashlsi3_r0)) |
30b0bb86 | 560 | #endif |
561 | ||
562 | #ifdef L_lshiftrt | |
563 | ||
564 | ! | |
16f1dae0 | 565 | ! GLOBAL(lshrsi3) |
6e7c6395 | 566 | ! (For compatibility with older binaries, not used by compiler) |
30b0bb86 | 567 | ! |
568 | ! Entry: | |
6e7c6395 | 569 | ! r4: Value to shift |
570 | ! r5: Shift count | |
30b0bb86 | 571 | ! |
572 | ! Exit: | |
6e7c6395 | 573 | ! r0: Result |
30b0bb86 | 574 | ! |
575 | ! Destroys: | |
6e7c6395 | 576 | ! T bit |
577 | ! | |
578 | ! | |
579 | ! GLOBAL(lshrsi3_r0) | |
30b0bb86 | 580 | ! |
6e7c6395 | 581 | ! Entry: |
582 | ! r4: Value to shift | |
583 | ! r0: Shift count | |
584 | ! | |
585 | ! Exit: | |
586 | ! r0: Result | |
30b0bb86 | 587 | ! |
6e7c6395 | 588 | ! Destroys: |
589 | ! T bit | |
590 | ||
16f1dae0 | 591 | .global GLOBAL(lshrsi3) |
6e7c6395 | 592 | .global GLOBAL(lshrsi3_r0) |
59312820 | 593 | HIDDEN_FUNC(GLOBAL(lshrsi3)) |
6e7c6395 | 594 | HIDDEN_FUNC(GLOBAL(lshrsi3_r0)) |
16f1dae0 | 595 | GLOBAL(lshrsi3): |
6e7c6395 | 596 | mov r5,r0 |
597 | .align 2 | |
598 | GLOBAL(lshrsi3_r0): | |
599 | ||
d73f1571 | 600 | #ifdef __sh1__ |
6e7c6395 | 601 | and #31,r0 |
602 | shll2 r0 | |
603 | mov.l r4,@-r15 | |
604 | mov r0,r4 | |
605 | mova LOCAL(lshrsi3_table),r0 | |
606 | add r4,r0 | |
607 | mov.l @r15+,r4 | |
30b0bb86 | 608 | jmp @r0 |
6e7c6395 | 609 | mov r4,r0 |
610 | .align 2 | |
d73f1571 | 611 | #else |
6e7c6395 | 612 | and #31,r0 |
613 | shll2 r0 | |
614 | braf r0 | |
30b0bb86 | 615 | mov r4,r0 |
6e7c6395 | 616 | #endif |
d73f1571 | 617 | LOCAL(lshrsi3_table): |
6e7c6395 | 618 | rts // >> 0 |
619 | nop | |
620 | LOCAL(lshrsi_1): // >> 1 | |
621 | rts | |
622 | shlr r0 | |
623 | LOCAL(lshrsi_2): // >> 2 | |
30b0bb86 | 624 | rts |
625 | shlr2 r0 | |
6e7c6395 | 626 | bra LOCAL(lshrsi_1) // >> 3 |
30b0bb86 | 627 | shlr2 r0 |
6e7c6395 | 628 | bra LOCAL(lshrsi_2) // >> 4 |
30b0bb86 | 629 | shlr2 r0 |
6e7c6395 | 630 | bra LOCAL(lshrsi_5) // >> 5 |
30b0bb86 | 631 | shlr r0 |
6e7c6395 | 632 | bra LOCAL(lshrsi_6) // >> 6 |
30b0bb86 | 633 | shlr2 r0 |
6e7c6395 | 634 | bra LOCAL(lshrsi_7) // >> 7 |
635 | shlr r0 | |
636 | LOCAL(lshrsi_8): // >> 8 | |
30b0bb86 | 637 | rts |
638 | shlr8 r0 | |
6e7c6395 | 639 | bra LOCAL(lshrsi_8) // >> 9 |
640 | shlr r0 | |
641 | bra LOCAL(lshrsi_8) // >> 10 | |
30b0bb86 | 642 | shlr2 r0 |
6e7c6395 | 643 | bra LOCAL(lshrsi_11) // >> 11 |
644 | shlr r0 | |
645 | bra LOCAL(lshrsi_12) // >> 12 | |
30b0bb86 | 646 | shlr2 r0 |
6e7c6395 | 647 | bra LOCAL(lshrsi_13) // >> 13 |
648 | shlr r0 | |
649 | bra LOCAL(lshrsi_14) // >> 14 | |
650 | shlr8 r0 | |
651 | bra LOCAL(lshrsi_15) // >> 15 | |
30b0bb86 | 652 | shlr8 r0 |
6e7c6395 | 653 | LOCAL(lshrsi_16): // >> 16 |
30b0bb86 | 654 | rts |
6e7c6395 | 655 | shlr16 r0 |
656 | bra LOCAL(lshrsi_16) // >> 17 | |
30b0bb86 | 657 | shlr r0 |
6e7c6395 | 658 | bra LOCAL(lshrsi_16) // >> 18 |
30b0bb86 | 659 | shlr2 r0 |
6e7c6395 | 660 | bra LOCAL(lshrsi_19) // >> 19 |
661 | shlr r0 | |
662 | bra LOCAL(lshrsi_20) // >> 20 | |
30b0bb86 | 663 | shlr2 r0 |
6e7c6395 | 664 | bra LOCAL(lshrsi_21) // >> 21 |
665 | shlr r0 | |
666 | bra LOCAL(lshrsi_22) // >> 22 | |
30b0bb86 | 667 | shlr16 r0 |
6e7c6395 | 668 | bra LOCAL(lshrsi_23) // >> 23 |
669 | shlr16 r0 | |
670 | bra LOCAL(lshrsi_16) // >> 24 | |
671 | shlr8 r0 | |
672 | bra LOCAL(lshrsi_25) // >> 25 | |
673 | shlr r0 | |
674 | bra LOCAL(lshrsi_26) // >> 26 | |
30b0bb86 | 675 | shlr2 r0 |
6e7c6395 | 676 | bra LOCAL(lshrsi_27) // >> 27 |
677 | shlr r0 | |
678 | bra LOCAL(lshrsi_28) // >> 28 | |
30b0bb86 | 679 | shlr2 r0 |
6e7c6395 | 680 | bra LOCAL(lshrsi_29) // >> 29 |
681 | shlr16 r0 | |
682 | bra LOCAL(lshrsi_30) // >> 30 | |
30b0bb86 | 683 | shlr16 r0 |
6e7c6395 | 684 | shll r0 // >> 31 |
30b0bb86 | 685 | rts |
6e7c6395 | 686 | movt r0 |
30b0bb86 | 687 | |
6e7c6395 | 688 | LOCAL(lshrsi_7): |
30b0bb86 | 689 | shlr2 r0 |
6e7c6395 | 690 | LOCAL(lshrsi_5): |
691 | LOCAL(lshrsi_6): | |
30b0bb86 | 692 | shlr2 r0 |
30b0bb86 | 693 | rts |
6e7c6395 | 694 | LOCAL(lshrsi_13): |
30b0bb86 | 695 | shlr2 r0 |
6e7c6395 | 696 | LOCAL(lshrsi_12): |
697 | LOCAL(lshrsi_11): | |
698 | shlr8 r0 | |
699 | rts | |
700 | LOCAL(lshrsi_21): | |
30b0bb86 | 701 | shlr2 r0 |
6e7c6395 | 702 | LOCAL(lshrsi_20): |
703 | LOCAL(lshrsi_19): | |
704 | shlr16 r0 | |
705 | rts | |
706 | LOCAL(lshrsi_28): | |
707 | LOCAL(lshrsi_27): | |
30b0bb86 | 708 | shlr2 r0 |
6e7c6395 | 709 | LOCAL(lshrsi_26): |
710 | LOCAL(lshrsi_25): | |
30b0bb86 | 711 | shlr16 r0 |
6e7c6395 | 712 | rts |
30b0bb86 | 713 | shlr8 r0 |
6e7c6395 | 714 | |
715 | LOCAL(lshrsi_22): | |
716 | LOCAL(lshrsi_14): | |
717 | shll2 r0 | |
30b0bb86 | 718 | rts |
6e7c6395 | 719 | shlr8 r0 |
30b0bb86 | 720 | |
6e7c6395 | 721 | LOCAL(lshrsi_23): |
722 | LOCAL(lshrsi_15): | |
723 | shll r0 | |
30b0bb86 | 724 | rts |
6e7c6395 | 725 | shlr8 r0 |
726 | ||
727 | LOCAL(lshrsi_29): | |
728 | shll r0 | |
729 | LOCAL(lshrsi_30): | |
730 | shll2 r0 | |
731 | rts | |
732 | shlr16 r0 | |
30b0bb86 | 733 | |
805e22b2 | 734 | ENDFUNC(GLOBAL(lshrsi3)) |
6e7c6395 | 735 | ENDFUNC(GLOBAL(lshrsi3_r0)) |
30b0bb86 | 736 | #endif |
737 | ||
008c057d | 738 | #ifdef L_movmem |
a85a2db0 | 739 | .text |
59312820 | 740 | .balign 4 |
741 | .global GLOBAL(movmem) | |
742 | HIDDEN_FUNC(GLOBAL(movmem)) | |
743 | HIDDEN_ALIAS(movstr,movmem) | |
744 | /* This would be a lot simpler if r6 contained the byte count | |
745 | minus 64, and we wouldn't be called here for a byte count of 64. */ | |
746 | GLOBAL(movmem): | |
747 | sts.l pr,@-r15 | |
748 | shll2 r6 | |
749 | bsr GLOBAL(movmemSI52+2) | |
750 | mov.l @(48,r5),r0 | |
751 | .balign 4 | |
752 | LOCAL(movmem_loop): /* Reached with rts */ | |
753 | mov.l @(60,r5),r0 | |
754 | add #-64,r6 | |
755 | mov.l r0,@(60,r4) | |
756 | tst r6,r6 | |
757 | mov.l @(56,r5),r0 | |
758 | bt LOCAL(movmem_done) | |
759 | mov.l r0,@(56,r4) | |
760 | cmp/pl r6 | |
761 | mov.l @(52,r5),r0 | |
762 | add #64,r5 | |
763 | mov.l r0,@(52,r4) | |
764 | add #64,r4 | |
765 | bt GLOBAL(movmemSI52) | |
a85a2db0 | 766 | ! done all the large groups, do the remainder |
008c057d | 767 | ! jump to movmem+ |
59312820 | 768 | mova GLOBAL(movmemSI4)+4,r0 |
a85a2db0 | 769 | add r6,r0 |
770 | jmp @r0 | |
59312820 | 771 | LOCAL(movmem_done): ! share slot insn, works out aligned. |
772 | lds.l @r15+,pr | |
773 | mov.l r0,@(56,r4) | |
774 | mov.l @(52,r5),r0 | |
775 | rts | |
776 | mov.l r0,@(52,r4) | |
777 | .balign 4 | |
94ccf9fb | 778 | ! ??? We need aliases movstr* for movmem* for the older libraries. These |
779 | ! aliases will be removed at the some point in the future. | |
008c057d | 780 | .global GLOBAL(movmemSI64) |
59312820 | 781 | HIDDEN_FUNC(GLOBAL(movmemSI64)) |
782 | HIDDEN_ALIAS(movstrSI64,movmemSI64) | |
008c057d | 783 | GLOBAL(movmemSI64): |
a85a2db0 | 784 | mov.l @(60,r5),r0 |
785 | mov.l r0,@(60,r4) | |
008c057d | 786 | .global GLOBAL(movmemSI60) |
59312820 | 787 | HIDDEN_FUNC(GLOBAL(movmemSI60)) |
788 | HIDDEN_ALIAS(movstrSI60,movmemSI60) | |
008c057d | 789 | GLOBAL(movmemSI60): |
a85a2db0 | 790 | mov.l @(56,r5),r0 |
791 | mov.l r0,@(56,r4) | |
008c057d | 792 | .global GLOBAL(movmemSI56) |
59312820 | 793 | HIDDEN_FUNC(GLOBAL(movmemSI56)) |
794 | HIDDEN_ALIAS(movstrSI56,movmemSI56) | |
008c057d | 795 | GLOBAL(movmemSI56): |
a85a2db0 | 796 | mov.l @(52,r5),r0 |
797 | mov.l r0,@(52,r4) | |
008c057d | 798 | .global GLOBAL(movmemSI52) |
59312820 | 799 | HIDDEN_FUNC(GLOBAL(movmemSI52)) |
800 | HIDDEN_ALIAS(movstrSI52,movmemSI52) | |
008c057d | 801 | GLOBAL(movmemSI52): |
a85a2db0 | 802 | mov.l @(48,r5),r0 |
803 | mov.l r0,@(48,r4) | |
008c057d | 804 | .global GLOBAL(movmemSI48) |
59312820 | 805 | HIDDEN_FUNC(GLOBAL(movmemSI48)) |
806 | HIDDEN_ALIAS(movstrSI48,movmemSI48) | |
008c057d | 807 | GLOBAL(movmemSI48): |
a85a2db0 | 808 | mov.l @(44,r5),r0 |
809 | mov.l r0,@(44,r4) | |
008c057d | 810 | .global GLOBAL(movmemSI44) |
59312820 | 811 | HIDDEN_FUNC(GLOBAL(movmemSI44)) |
812 | HIDDEN_ALIAS(movstrSI44,movmemSI44) | |
008c057d | 813 | GLOBAL(movmemSI44): |
a85a2db0 | 814 | mov.l @(40,r5),r0 |
815 | mov.l r0,@(40,r4) | |
008c057d | 816 | .global GLOBAL(movmemSI40) |
59312820 | 817 | HIDDEN_FUNC(GLOBAL(movmemSI40)) |
818 | HIDDEN_ALIAS(movstrSI40,movmemSI40) | |
008c057d | 819 | GLOBAL(movmemSI40): |
a85a2db0 | 820 | mov.l @(36,r5),r0 |
821 | mov.l r0,@(36,r4) | |
008c057d | 822 | .global GLOBAL(movmemSI36) |
59312820 | 823 | HIDDEN_FUNC(GLOBAL(movmemSI36)) |
824 | HIDDEN_ALIAS(movstrSI36,movmemSI36) | |
008c057d | 825 | GLOBAL(movmemSI36): |
a85a2db0 | 826 | mov.l @(32,r5),r0 |
827 | mov.l r0,@(32,r4) | |
008c057d | 828 | .global GLOBAL(movmemSI32) |
59312820 | 829 | HIDDEN_FUNC(GLOBAL(movmemSI32)) |
830 | HIDDEN_ALIAS(movstrSI32,movmemSI32) | |
008c057d | 831 | GLOBAL(movmemSI32): |
a85a2db0 | 832 | mov.l @(28,r5),r0 |
833 | mov.l r0,@(28,r4) | |
008c057d | 834 | .global GLOBAL(movmemSI28) |
59312820 | 835 | HIDDEN_FUNC(GLOBAL(movmemSI28)) |
836 | HIDDEN_ALIAS(movstrSI28,movmemSI28) | |
008c057d | 837 | GLOBAL(movmemSI28): |
a85a2db0 | 838 | mov.l @(24,r5),r0 |
839 | mov.l r0,@(24,r4) | |
008c057d | 840 | .global GLOBAL(movmemSI24) |
59312820 | 841 | HIDDEN_FUNC(GLOBAL(movmemSI24)) |
842 | HIDDEN_ALIAS(movstrSI24,movmemSI24) | |
008c057d | 843 | GLOBAL(movmemSI24): |
a85a2db0 | 844 | mov.l @(20,r5),r0 |
845 | mov.l r0,@(20,r4) | |
008c057d | 846 | .global GLOBAL(movmemSI20) |
59312820 | 847 | HIDDEN_FUNC(GLOBAL(movmemSI20)) |
848 | HIDDEN_ALIAS(movstrSI20,movmemSI20) | |
008c057d | 849 | GLOBAL(movmemSI20): |
a85a2db0 | 850 | mov.l @(16,r5),r0 |
851 | mov.l r0,@(16,r4) | |
008c057d | 852 | .global GLOBAL(movmemSI16) |
59312820 | 853 | HIDDEN_FUNC(GLOBAL(movmemSI16)) |
854 | HIDDEN_ALIAS(movstrSI16,movmemSI16) | |
008c057d | 855 | GLOBAL(movmemSI16): |
a85a2db0 | 856 | mov.l @(12,r5),r0 |
857 | mov.l r0,@(12,r4) | |
008c057d | 858 | .global GLOBAL(movmemSI12) |
59312820 | 859 | HIDDEN_FUNC(GLOBAL(movmemSI12)) |
860 | HIDDEN_ALIAS(movstrSI12,movmemSI12) | |
008c057d | 861 | GLOBAL(movmemSI12): |
a85a2db0 | 862 | mov.l @(8,r5),r0 |
863 | mov.l r0,@(8,r4) | |
008c057d | 864 | .global GLOBAL(movmemSI8) |
59312820 | 865 | HIDDEN_FUNC(GLOBAL(movmemSI8)) |
866 | HIDDEN_ALIAS(movstrSI8,movmemSI8) | |
008c057d | 867 | GLOBAL(movmemSI8): |
a85a2db0 | 868 | mov.l @(4,r5),r0 |
869 | mov.l r0,@(4,r4) | |
008c057d | 870 | .global GLOBAL(movmemSI4) |
59312820 | 871 | HIDDEN_FUNC(GLOBAL(movmemSI4)) |
872 | HIDDEN_ALIAS(movstrSI4,movmemSI4) | |
008c057d | 873 | GLOBAL(movmemSI4): |
a85a2db0 | 874 | mov.l @(0,r5),r0 |
a85a2db0 | 875 | rts |
59312820 | 876 | mov.l r0,@(0,r4) |
a85a2db0 | 877 | |
008c057d | 878 | ENDFUNC(GLOBAL(movmemSI64)) |
879 | ENDFUNC(GLOBAL(movmemSI60)) | |
880 | ENDFUNC(GLOBAL(movmemSI56)) | |
881 | ENDFUNC(GLOBAL(movmemSI52)) | |
882 | ENDFUNC(GLOBAL(movmemSI48)) | |
883 | ENDFUNC(GLOBAL(movmemSI44)) | |
884 | ENDFUNC(GLOBAL(movmemSI40)) | |
885 | ENDFUNC(GLOBAL(movmemSI36)) | |
886 | ENDFUNC(GLOBAL(movmemSI32)) | |
887 | ENDFUNC(GLOBAL(movmemSI28)) | |
888 | ENDFUNC(GLOBAL(movmemSI24)) | |
889 | ENDFUNC(GLOBAL(movmemSI20)) | |
890 | ENDFUNC(GLOBAL(movmemSI16)) | |
891 | ENDFUNC(GLOBAL(movmemSI12)) | |
892 | ENDFUNC(GLOBAL(movmemSI8)) | |
893 | ENDFUNC(GLOBAL(movmemSI4)) | |
59312820 | 894 | ENDFUNC(GLOBAL(movmem)) |
a85a2db0 | 895 | #endif |
896 | ||
008c057d | 897 | #ifdef L_movmem_i4 |
1b61190c | 898 | .text |
008c057d | 899 | .global GLOBAL(movmem_i4_even) |
900 | .global GLOBAL(movmem_i4_odd) | |
901 | .global GLOBAL(movmemSI12_i4) | |
1b61190c | 902 | |
59312820 | 903 | HIDDEN_FUNC(GLOBAL(movmem_i4_even)) |
904 | HIDDEN_FUNC(GLOBAL(movmem_i4_odd)) | |
905 | HIDDEN_FUNC(GLOBAL(movmemSI12_i4)) | |
805e22b2 | 906 | |
59312820 | 907 | HIDDEN_ALIAS(movstr_i4_even,movmem_i4_even) |
908 | HIDDEN_ALIAS(movstr_i4_odd,movmem_i4_odd) | |
909 | HIDDEN_ALIAS(movstrSI12_i4,movmemSI12_i4) | |
94ccf9fb | 910 | |
1b61190c | 911 | .p2align 5 |
008c057d | 912 | L_movmem_2mod4_end: |
1b61190c | 913 | mov.l r0,@(16,r4) |
914 | rts | |
915 | mov.l r1,@(20,r4) | |
916 | ||
917 | .p2align 2 | |
918 | ||
008c057d | 919 | GLOBAL(movmem_i4_even): |
805e22b2 | 920 | mov.l @r5+,r0 |
008c057d | 921 | bra L_movmem_start_even |
805e22b2 | 922 | mov.l @r5+,r1 |
923 | ||
008c057d | 924 | GLOBAL(movmem_i4_odd): |
1b61190c | 925 | mov.l @r5+,r1 |
926 | add #-4,r4 | |
927 | mov.l @r5+,r2 | |
928 | mov.l @r5+,r3 | |
929 | mov.l r1,@(4,r4) | |
930 | mov.l r2,@(8,r4) | |
931 | ||
008c057d | 932 | L_movmem_loop: |
1b61190c | 933 | mov.l r3,@(12,r4) |
934 | dt r6 | |
935 | mov.l @r5+,r0 | |
008c057d | 936 | bt/s L_movmem_2mod4_end |
1b61190c | 937 | mov.l @r5+,r1 |
938 | add #16,r4 | |
008c057d | 939 | L_movmem_start_even: |
1b61190c | 940 | mov.l @r5+,r2 |
941 | mov.l @r5+,r3 | |
942 | mov.l r0,@r4 | |
943 | dt r6 | |
944 | mov.l r1,@(4,r4) | |
008c057d | 945 | bf/s L_movmem_loop |
1b61190c | 946 | mov.l r2,@(8,r4) |
947 | rts | |
948 | mov.l r3,@(12,r4) | |
949 | ||
008c057d | 950 | ENDFUNC(GLOBAL(movmem_i4_even)) |
951 | ENDFUNC(GLOBAL(movmem_i4_odd)) | |
1b61190c | 952 | |
953 | .p2align 4 | |
008c057d | 954 | GLOBAL(movmemSI12_i4): |
1b61190c | 955 | mov.l @r5,r0 |
956 | mov.l @(4,r5),r1 | |
957 | mov.l @(8,r5),r2 | |
958 | mov.l r0,@r4 | |
959 | mov.l r1,@(4,r4) | |
960 | rts | |
961 | mov.l r2,@(8,r4) | |
805e22b2 | 962 | |
008c057d | 963 | ENDFUNC(GLOBAL(movmemSI12_i4)) |
1b61190c | 964 | #endif |
965 | ||
a85a2db0 | 966 | #ifdef L_mulsi3 |
967 | ||
968 | ||
16f1dae0 | 969 | .global GLOBAL(mulsi3) |
59312820 | 970 | HIDDEN_FUNC(GLOBAL(mulsi3)) |
a85a2db0 | 971 | |
972 | ! r4 = aabb | |
973 | ! r5 = ccdd | |
974 | ! r0 = aabb*ccdd via partial products | |
975 | ! | |
976 | ! if aa == 0 and cc = 0 | |
977 | ! r0 = bb*dd | |
978 | ! | |
979 | ! else | |
980 | ! aa = bb*dd + (aa*dd*65536) + (cc*bb*65536) | |
981 | ! | |
982 | ||
16f1dae0 | 983 | GLOBAL(mulsi3): |
fb062b27 | 984 | mulu.w r4,r5 ! multiply the lsws macl=bb*dd |
a85a2db0 | 985 | mov r5,r3 ! r3 = ccdd |
986 | swap.w r4,r2 ! r2 = bbaa | |
987 | xtrct r2,r3 ! r3 = aacc | |
988 | tst r3,r3 ! msws zero ? | |
30b0bb86 | 989 | bf hiset |
b090827b | 990 | rts ! yes - then we have the answer |
a85a2db0 | 991 | sts macl,r0 |
992 | ||
993 | hiset: sts macl,r0 ! r0 = bb*dd | |
fb062b27 | 994 | mulu.w r2,r5 ! brewing macl = aa*dd |
a85a2db0 | 995 | sts macl,r1 |
fb062b27 | 996 | mulu.w r3,r4 ! brewing macl = cc*bb |
30b0bb86 | 997 | sts macl,r2 |
a85a2db0 | 998 | add r1,r2 |
999 | shll16 r2 | |
1000 | rts | |
1001 | add r2,r0 | |
30b0bb86 | 1002 | |
59312820 | 1003 | ENDFUNC(GLOBAL(mulsi3)) |
30b0bb86 | 1004 | #endif |
87e19636 | 1005 | #endif /* ! __SH5__ */ |
fe9c9e23 | 1006 | |
1007 | /*------------------------------------------------------------------------------ | |
1008 | 32 bit signed integer division that uses FPU double precision division. */ | |
1009 | ||
1b61190c | 1010 | #ifdef L_sdivsi3_i4 |
a85a2db0 | 1011 | .title "SH DIVIDE" |
fe9c9e23 | 1012 | |
bb057878 | 1013 | #if defined (__SH4__) || defined (__SH2A__) |
fe9c9e23 | 1014 | /* This variant is used when FPSCR.PR = 1 (double precision) is the default |
1015 | setting. | |
1016 | Args in r4 and r5, result in fpul, clobber dr0, dr2. */ | |
1b61190c | 1017 | |
16f1dae0 | 1018 | .global GLOBAL(sdivsi3_i4) |
59312820 | 1019 | HIDDEN_FUNC(GLOBAL(sdivsi3_i4)) |
16f1dae0 | 1020 | GLOBAL(sdivsi3_i4): |
1b61190c | 1021 | lds r4,fpul |
1022 | float fpul,dr0 | |
1023 | lds r5,fpul | |
1024 | float fpul,dr2 | |
1025 | fdiv dr2,dr0 | |
1026 | rts | |
1027 | ftrc dr0,fpul | |
1028 | ||
805e22b2 | 1029 | ENDFUNC(GLOBAL(sdivsi3_i4)) |
fe9c9e23 | 1030 | |
bb057878 | 1031 | #elif defined (__SH2A_SINGLE__) || defined (__SH2A_SINGLE_ONLY__) || defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) || (defined (__SH5__) && ! defined __SH4_NOFPU__) |
fe9c9e23 | 1032 | /* This variant is used when FPSCR.PR = 0 (sigle precision) is the default |
1033 | setting. | |
1034 | Args in r4 and r5, result in fpul, clobber r2, dr0, dr2. | |
1035 | For this to work, we must temporarily switch the FPU do double precision, | |
1036 | but we better do not touch FPSCR.FR. See PR 6526. */ | |
1b61190c | 1037 | |
87e19636 | 1038 | #if ! __SH5__ || __SH5__ == 32 |
1039 | #if __SH5__ | |
1040 | .mode SHcompact | |
1041 | #endif | |
16f1dae0 | 1042 | .global GLOBAL(sdivsi3_i4) |
59312820 | 1043 | HIDDEN_FUNC(GLOBAL(sdivsi3_i4)) |
16f1dae0 | 1044 | GLOBAL(sdivsi3_i4): |
fe9c9e23 | 1045 | |
1046 | #ifndef __SH4A__ | |
1047 | mov.l r3,@-r15 | |
1048 | sts fpscr,r2 | |
1049 | mov #8,r3 | |
1050 | swap.w r3,r3 // r3 = 1 << 19 (FPSCR.PR bit) | |
1051 | or r2,r3 | |
1052 | lds r3,fpscr // Set FPSCR.PR = 1. | |
1053 | lds r4,fpul | |
1054 | float fpul,dr0 | |
1055 | lds r5,fpul | |
1056 | float fpul,dr2 | |
1057 | fdiv dr2,dr0 | |
1058 | ftrc dr0,fpul | |
1059 | lds r2,fpscr | |
1b61190c | 1060 | rts |
fe9c9e23 | 1061 | mov.l @r15+,r3 |
1062 | #else | |
1063 | /* On SH4A we can use the fpchg instruction to flip the FPSCR.PR bit. */ | |
1064 | fpchg | |
1065 | lds r4,fpul | |
1066 | float fpul,dr0 | |
1067 | lds r5,fpul | |
1068 | float fpul,dr2 | |
1069 | fdiv dr2,dr0 | |
1070 | ftrc dr0,fpul | |
1071 | rts | |
1072 | fpchg | |
1073 | ||
1074 | #endif /* __SH4A__ */ | |
1b61190c | 1075 | |
805e22b2 | 1076 | ENDFUNC(GLOBAL(sdivsi3_i4)) |
87e19636 | 1077 | #endif /* ! __SH5__ || __SH5__ == 32 */ |
bb057878 | 1078 | #endif /* ! __SH4__ || __SH2A__ */ |
fe9c9e23 | 1079 | #endif /* L_sdivsi3_i4 */ |
1b61190c | 1080 | |
fe9c9e23 | 1081 | //------------------------------------------------------------------------------ |
1b61190c | 1082 | #ifdef L_sdivsi3 |
1083 | /* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with | |
87ed74ef | 1084 | sh2e/sh3e code. */ |
a85a2db0 | 1085 | !! |
1086 | !! Steve Chamberlain | |
1087 | !! sac@cygnus.com | |
1088 | !! | |
1089 | !! | |
1090 | ||
805e22b2 | 1091 | !! args in r4 and r5, result in r0 clobber r1, r2, r3, and t bit |
a85a2db0 | 1092 | |
16f1dae0 | 1093 | .global GLOBAL(sdivsi3) |
87e19636 | 1094 | #if __SHMEDIA__ |
1095 | #if __SH5__ == 32 | |
1096 | .section .text..SHmedia32,"ax" | |
1097 | #else | |
1098 | .text | |
1099 | #endif | |
1100 | .align 2 | |
0c63e844 | 1101 | #if 0 |
87e19636 | 1102 | /* The assembly code that follows is a hand-optimized version of the C |
1103 | code that follows. Note that the registers that are modified are | |
1104 | exactly those listed as clobbered in the patterns divsi3_i1 and | |
1105 | divsi3_i1_media. | |
1106 | ||
1107 | int __sdivsi3 (i, j) | |
1108 | int i, j; | |
1109 | { | |
1110 | register unsigned long long r18 asm ("r18"); | |
1111 | register unsigned long long r19 asm ("r19"); | |
1112 | register unsigned long long r0 asm ("r0") = 0; | |
1113 | register unsigned long long r1 asm ("r1") = 1; | |
1114 | register int r2 asm ("r2") = i >> 31; | |
1115 | register int r3 asm ("r3") = j >> 31; | |
1116 | ||
1117 | r2 = r2 ? r2 : r1; | |
1118 | r3 = r3 ? r3 : r1; | |
1119 | r18 = i * r2; | |
1120 | r19 = j * r3; | |
1121 | r2 *= r3; | |
1122 | ||
1123 | r19 <<= 31; | |
1124 | r1 <<= 31; | |
1125 | do | |
1126 | if (r18 >= r19) | |
1127 | r0 |= r1, r18 -= r19; | |
1128 | while (r19 >>= 1, r1 >>= 1); | |
1129 | ||
1130 | return r2 * (int)r0; | |
1131 | } | |
1132 | */ | |
1133 | GLOBAL(sdivsi3): | |
1134 | pt/l LOCAL(sdivsi3_dontadd), tr2 | |
1135 | pt/l LOCAL(sdivsi3_loop), tr1 | |
1136 | ptabs/l r18, tr0 | |
1137 | movi 0, r0 | |
1138 | movi 1, r1 | |
1139 | shari.l r4, 31, r2 | |
1140 | shari.l r5, 31, r3 | |
1141 | cmveq r2, r1, r2 | |
1142 | cmveq r3, r1, r3 | |
1143 | muls.l r4, r2, r18 | |
1144 | muls.l r5, r3, r19 | |
1145 | muls.l r2, r3, r2 | |
1146 | shlli r19, 31, r19 | |
1147 | shlli r1, 31, r1 | |
1148 | LOCAL(sdivsi3_loop): | |
1149 | bgtu r19, r18, tr2 | |
1150 | or r0, r1, r0 | |
1151 | sub r18, r19, r18 | |
1152 | LOCAL(sdivsi3_dontadd): | |
1153 | shlri r1, 1, r1 | |
1154 | shlri r19, 1, r19 | |
1155 | bnei r1, 0, tr1 | |
1156 | muls.l r0, r2, r0 | |
1157 | add.l r0, r63, r0 | |
1158 | blink tr0, r63 | |
59312820 | 1159 | #elif 0 /* ! 0 */ |
0c63e844 | 1160 | // inputs: r4,r5 |
1161 | // clobbered: r1,r2,r3,r18,r19,r20,r21,r25,tr0 | |
1162 | // result in r0 | |
1163 | GLOBAL(sdivsi3): | |
1164 | // can create absolute value without extra latency, | |
1165 | // but dependent on proper sign extension of inputs: | |
1166 | // shari.l r5,31,r2 | |
1167 | // xor r5,r2,r20 | |
1168 | // sub r20,r2,r20 // r20 is now absolute value of r5, zero-extended. | |
1169 | shari.l r5,31,r2 | |
1170 | ori r2,1,r2 | |
1171 | muls.l r5,r2,r20 // r20 is now absolute value of r5, zero-extended. | |
1172 | movi 0xffffffffffffbb0c,r19 // shift count eqiv 76 | |
1173 | shari.l r4,31,r3 | |
1174 | nsb r20,r0 | |
1175 | shlld r20,r0,r25 | |
1176 | shlri r25,48,r25 | |
1177 | sub r19,r25,r1 | |
1178 | mmulfx.w r1,r1,r2 | |
1179 | mshflo.w r1,r63,r1 | |
1180 | // If r4 was to be used in-place instead of r21, could use this sequence | |
1181 | // to compute absolute: | |
1182 | // sub r63,r4,r19 // compute absolute value of r4 | |
1183 | // shlri r4,32,r3 // into lower 32 bit of r4, keeping | |
1184 | // mcmv r19,r3,r4 // the sign in the upper 32 bits intact. | |
1185 | ori r3,1,r3 | |
1186 | mmulfx.w r25,r2,r2 | |
1187 | sub r19,r0,r0 | |
1188 | muls.l r4,r3,r21 | |
1189 | msub.w r1,r2,r2 | |
1190 | addi r2,-2,r1 | |
1191 | mulu.l r21,r1,r19 | |
1192 | mmulfx.w r2,r2,r2 | |
1193 | shlli r1,15,r1 | |
1194 | shlrd r19,r0,r19 | |
1195 | mulu.l r19,r20,r3 | |
1196 | mmacnfx.wl r25,r2,r1 | |
1197 | ptabs r18,tr0 | |
1198 | sub r21,r3,r25 | |
1199 | ||
1200 | mulu.l r25,r1,r2 | |
1201 | addi r0,14,r0 | |
1202 | xor r4,r5,r18 | |
1203 | shlrd r2,r0,r2 | |
1204 | mulu.l r2,r20,r3 | |
1205 | add r19,r2,r19 | |
1206 | shari.l r18,31,r18 | |
1207 | sub r25,r3,r25 | |
1208 | ||
1209 | mulu.l r25,r1,r2 | |
1210 | sub r25,r20,r25 | |
1211 | add r19,r18,r19 | |
1212 | shlrd r2,r0,r2 | |
1213 | mulu.l r2,r20,r3 | |
1214 | addi r25,1,r25 | |
1215 | add r19,r2,r19 | |
1216 | ||
1217 | cmpgt r25,r3,r25 | |
1218 | add.l r19,r25,r0 | |
1219 | xor r0,r18,r0 | |
1220 | blink tr0,r63 | |
59312820 | 1221 | #else /* ! 0 && ! 0 */ |
1222 | ||
1223 | // inputs: r4,r5 | |
1224 | // clobbered: r1,r18,r19,r20,r21,r25,tr0 | |
1225 | // result in r0 | |
1226 | HIDDEN_FUNC(GLOBAL(sdivsi3_2)) | |
1227 | #ifndef __pic__ | |
1228 | FUNC(GLOBAL(sdivsi3)) | |
1229 | GLOBAL(sdivsi3): /* this is the shcompact entry point */ | |
1230 | // The special SHmedia entry point sdivsi3_1 prevents accidental linking | |
1231 | // with the SHcompact implementation, which clobbers tr1 / tr2. | |
1232 | .global GLOBAL(sdivsi3_1) | |
1233 | GLOBAL(sdivsi3_1): | |
1234 | .global GLOBAL(div_table_internal) | |
1235 | movi (GLOBAL(div_table_internal) >> 16) & 65535, r20 | |
1236 | shori GLOBAL(div_table_internal) & 65535, r20 | |
1237 | #endif | |
1238 | .global GLOBAL(sdivsi3_2) | |
1239 | // div_table in r20 | |
1240 | // clobbered: r1,r18,r19,r21,r25,tr0 | |
1241 | GLOBAL(sdivsi3_2): | |
1242 | nsb r5, r1 | |
1243 | shlld r5, r1, r25 // normalize; [-2 ..1, 1..2) in s2.62 | |
1244 | shari r25, 58, r21 // extract 5(6) bit index (s2.4 with hole -1..1) | |
1245 | ldx.ub r20, r21, r19 // u0.8 | |
1246 | shari r25, 32, r25 // normalize to s2.30 | |
1247 | shlli r21, 1, r21 | |
1248 | muls.l r25, r19, r19 // s2.38 | |
1249 | ldx.w r20, r21, r21 // s2.14 | |
1250 | ptabs r18, tr0 | |
1251 | shari r19, 24, r19 // truncate to s2.14 | |
1252 | sub r21, r19, r19 // some 11 bit inverse in s1.14 | |
1253 | muls.l r19, r19, r21 // u0.28 | |
1254 | sub r63, r1, r1 | |
1255 | addi r1, 92, r1 | |
1256 | muls.l r25, r21, r18 // s2.58 | |
1257 | shlli r19, 45, r19 // multiply by two and convert to s2.58 | |
1258 | /* bubble */ | |
1259 | sub r19, r18, r18 | |
1260 | shari r18, 28, r18 // some 22 bit inverse in s1.30 | |
1261 | muls.l r18, r25, r0 // s2.60 | |
1262 | muls.l r18, r4, r25 // s32.30 | |
1263 | /* bubble */ | |
1264 | shari r0, 16, r19 // s-16.44 | |
1265 | muls.l r19, r18, r19 // s-16.74 | |
1266 | shari r25, 63, r0 | |
1267 | shari r4, 14, r18 // s19.-14 | |
1268 | shari r19, 30, r19 // s-16.44 | |
1269 | muls.l r19, r18, r19 // s15.30 | |
1270 | xor r21, r0, r21 // You could also use the constant 1 << 27. | |
1271 | add r21, r25, r21 | |
1272 | sub r21, r19, r21 | |
1273 | shard r21, r1, r21 | |
1274 | sub r21, r0, r0 | |
1275 | blink tr0, r63 | |
1276 | #ifndef __pic__ | |
1277 | ENDFUNC(GLOBAL(sdivsi3)) | |
1278 | #endif | |
1279 | ENDFUNC(GLOBAL(sdivsi3_2)) | |
0c63e844 | 1280 | #endif |
273fffd6 | 1281 | #elif __SHMEDIA__ |
0c63e844 | 1282 | /* m5compact-nofpu */ |
1283 | // clobbered: r18,r19,r20,r21,r25,tr0,tr1,tr2 | |
1284 | .mode SHmedia | |
1285 | .section .text..SHmedia32,"ax" | |
1286 | .align 2 | |
59312820 | 1287 | FUNC(GLOBAL(sdivsi3)) |
0c63e844 | 1288 | GLOBAL(sdivsi3): |
1289 | pt/l LOCAL(sdivsi3_dontsub), tr0 | |
1290 | pt/l LOCAL(sdivsi3_loop), tr1 | |
1291 | ptabs/l r18,tr2 | |
1292 | shari.l r4,31,r18 | |
1293 | shari.l r5,31,r19 | |
1294 | xor r4,r18,r20 | |
1295 | xor r5,r19,r21 | |
1296 | sub.l r20,r18,r20 | |
1297 | sub.l r21,r19,r21 | |
1298 | xor r18,r19,r19 | |
1299 | shlli r21,32,r25 | |
1300 | addi r25,-1,r21 | |
1301 | addz.l r20,r63,r20 | |
1302 | LOCAL(sdivsi3_loop): | |
1303 | shlli r20,1,r20 | |
1304 | bgeu/u r21,r20,tr0 | |
1305 | sub r20,r21,r20 | |
1306 | LOCAL(sdivsi3_dontsub): | |
1307 | addi.l r25,-1,r25 | |
1308 | bnei r25,-32,tr1 | |
1309 | xor r20,r19,r20 | |
1310 | sub.l r20,r19,r0 | |
1311 | blink tr2,r63 | |
59312820 | 1312 | ENDFUNC(GLOBAL(sdivsi3)) |
0c63e844 | 1313 | #else /* ! __SHMEDIA__ */ |
59312820 | 1314 | FUNC(GLOBAL(sdivsi3)) |
16f1dae0 | 1315 | GLOBAL(sdivsi3): |
a85a2db0 | 1316 | mov r4,r1 |
1317 | mov r5,r0 | |
30b0bb86 | 1318 | |
a85a2db0 | 1319 | tst r0,r0 |
1320 | bt div0 | |
1321 | mov #0,r2 | |
1322 | div0s r2,r1 | |
1323 | subc r3,r3 | |
1324 | subc r2,r1 | |
1325 | div0s r0,r3 | |
1326 | rotcl r1 | |
1327 | div1 r0,r3 | |
1328 | rotcl r1 | |
1329 | div1 r0,r3 | |
1330 | rotcl r1 | |
1331 | div1 r0,r3 | |
1332 | rotcl r1 | |
1333 | div1 r0,r3 | |
1334 | rotcl r1 | |
1335 | div1 r0,r3 | |
1336 | rotcl r1 | |
1337 | div1 r0,r3 | |
1338 | rotcl r1 | |
1339 | div1 r0,r3 | |
1340 | rotcl r1 | |
1341 | div1 r0,r3 | |
1342 | rotcl r1 | |
1343 | div1 r0,r3 | |
1344 | rotcl r1 | |
1345 | div1 r0,r3 | |
1346 | rotcl r1 | |
1347 | div1 r0,r3 | |
1348 | rotcl r1 | |
1349 | div1 r0,r3 | |
1350 | rotcl r1 | |
1351 | div1 r0,r3 | |
1352 | rotcl r1 | |
1353 | div1 r0,r3 | |
1354 | rotcl r1 | |
1355 | div1 r0,r3 | |
1356 | rotcl r1 | |
1357 | div1 r0,r3 | |
1358 | rotcl r1 | |
1359 | div1 r0,r3 | |
1360 | rotcl r1 | |
1361 | div1 r0,r3 | |
1362 | rotcl r1 | |
1363 | div1 r0,r3 | |
1364 | rotcl r1 | |
1365 | div1 r0,r3 | |
1366 | rotcl r1 | |
1367 | div1 r0,r3 | |
1368 | rotcl r1 | |
1369 | div1 r0,r3 | |
1370 | rotcl r1 | |
1371 | div1 r0,r3 | |
1372 | rotcl r1 | |
1373 | div1 r0,r3 | |
1374 | rotcl r1 | |
1375 | div1 r0,r3 | |
1376 | rotcl r1 | |
1377 | div1 r0,r3 | |
1378 | rotcl r1 | |
1379 | div1 r0,r3 | |
1380 | rotcl r1 | |
1381 | div1 r0,r3 | |
1382 | rotcl r1 | |
1383 | div1 r0,r3 | |
1384 | rotcl r1 | |
1385 | div1 r0,r3 | |
1386 | rotcl r1 | |
1387 | div1 r0,r3 | |
1388 | rotcl r1 | |
1389 | div1 r0,r3 | |
1390 | rotcl r1 | |
1391 | addc r2,r1 | |
30b0bb86 | 1392 | rts |
a85a2db0 | 1393 | mov r1,r0 |
1394 | ||
30b0bb86 | 1395 | |
a85a2db0 | 1396 | div0: rts |
1397 | mov #0,r0 | |
1398 | ||
805e22b2 | 1399 | ENDFUNC(GLOBAL(sdivsi3)) |
fe9c9e23 | 1400 | #endif /* ! __SHMEDIA__ */ |
1401 | #endif /* L_sdivsi3 */ | |
1402 | ||
1403 | /*------------------------------------------------------------------------------ | |
1404 | 32 bit unsigned integer division that uses FPU double precision division. */ | |
a85a2db0 | 1405 | |
fe9c9e23 | 1406 | #ifdef L_udivsi3_i4 |
a85a2db0 | 1407 | .title "SH DIVIDE" |
fe9c9e23 | 1408 | |
bb057878 | 1409 | #if defined (__SH4__) || defined (__SH2A__) |
fe9c9e23 | 1410 | /* This variant is used when FPSCR.PR = 1 (double precision) is the default |
1411 | setting. | |
1412 | Args in r4 and r5, result in fpul, | |
1413 | clobber r0, r1, r4, r5, dr0, dr2, dr4, and t bit */ | |
1b61190c | 1414 | |
16f1dae0 | 1415 | .global GLOBAL(udivsi3_i4) |
59312820 | 1416 | HIDDEN_FUNC(GLOBAL(udivsi3_i4)) |
16f1dae0 | 1417 | GLOBAL(udivsi3_i4): |
fe9c9e23 | 1418 | mov #1,r1 |
1419 | cmp/hi r1,r5 | |
1420 | bf/s trivial | |
1421 | rotr r1 | |
1422 | xor r1,r4 | |
1423 | lds r4,fpul | |
1424 | mova L1,r0 | |
1b61190c | 1425 | #ifdef FMOVD_WORKS |
fe9c9e23 | 1426 | fmov.d @r0+,dr4 |
1b61190c | 1427 | #else |
fe9c9e23 | 1428 | fmov.s @r0+,DR40 |
1429 | fmov.s @r0,DR41 | |
1b61190c | 1430 | #endif |
fe9c9e23 | 1431 | float fpul,dr0 |
1432 | xor r1,r5 | |
1433 | lds r5,fpul | |
1434 | float fpul,dr2 | |
1435 | fadd dr4,dr0 | |
1436 | fadd dr4,dr2 | |
1437 | fdiv dr2,dr0 | |
1b61190c | 1438 | rts |
fe9c9e23 | 1439 | ftrc dr0,fpul |
1b61190c | 1440 | |
1441 | trivial: | |
1442 | rts | |
fe9c9e23 | 1443 | lds r4,fpul |
1b61190c | 1444 | |
1445 | .align 2 | |
6a807829 | 1446 | #ifdef FMOVD_WORKS |
fe9c9e23 | 1447 | .align 3 // Make the double below 8 byte aligned. |
6a807829 | 1448 | #endif |
1b61190c | 1449 | L1: |
1450 | .double 2147483648 | |
1451 | ||
805e22b2 | 1452 | ENDFUNC(GLOBAL(udivsi3_i4)) |
fe9c9e23 | 1453 | |
bb057878 | 1454 | #elif defined (__SH5__) && ! defined (__SH4_NOFPU__) && ! defined (__SH2A_NOFPU__) |
de0cf984 | 1455 | #if ! __SH5__ || __SH5__ == 32 |
1456 | !! args in r4 and r5, result in fpul, clobber r20, r21, dr0, fr33 | |
1457 | .mode SHmedia | |
1458 | .global GLOBAL(udivsi3_i4) | |
59312820 | 1459 | HIDDEN_FUNC(GLOBAL(udivsi3_i4)) |
de0cf984 | 1460 | GLOBAL(udivsi3_i4): |
1461 | addz.l r4,r63,r20 | |
1462 | addz.l r5,r63,r21 | |
1463 | fmov.qd r20,dr0 | |
1464 | fmov.qd r21,dr32 | |
1465 | ptabs r18,tr0 | |
1466 | float.qd dr0,dr0 | |
1467 | float.qd dr32,dr32 | |
1468 | fdiv.d dr0,dr32,dr0 | |
1469 | ftrc.dq dr0,dr32 | |
1470 | fmov.s fr33,fr32 | |
1471 | blink tr0,r63 | |
805e22b2 | 1472 | |
1473 | ENDFUNC(GLOBAL(udivsi3_i4)) | |
de0cf984 | 1474 | #endif /* ! __SH5__ || __SH5__ == 32 */ |
fe9c9e23 | 1475 | |
bb057878 | 1476 | #elif defined (__SH2A_SINGLE__) || defined (__SH2A_SINGLE_ONLY__) || defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) |
fe9c9e23 | 1477 | /* This variant is used when FPSCR.PR = 0 (sigle precision) is the default |
1478 | setting. | |
1479 | Args in r4 and r5, result in fpul, | |
1480 | clobber r0, r1, r4, r5, dr0, dr2, dr4. | |
1481 | For this to work, we must temporarily switch the FPU do double precision, | |
1482 | but we better do not touch FPSCR.FR. See PR 6526. */ | |
1b61190c | 1483 | |
16f1dae0 | 1484 | .global GLOBAL(udivsi3_i4) |
59312820 | 1485 | HIDDEN_FUNC(GLOBAL(udivsi3_i4)) |
16f1dae0 | 1486 | GLOBAL(udivsi3_i4): |
fe9c9e23 | 1487 | |
1488 | #ifndef __SH4A__ | |
1489 | mov #1,r1 | |
1490 | cmp/hi r1,r5 | |
1491 | bf/s trivial | |
1492 | rotr r1 // r1 = 1 << 31 | |
1493 | sts.l fpscr,@-r15 | |
1494 | xor r1,r4 | |
1495 | mov.l @(0,r15),r0 | |
1496 | xor r1,r5 | |
1497 | mov.l L2,r1 | |
1498 | lds r4,fpul | |
1499 | or r0,r1 | |
1500 | mova L1,r0 | |
1501 | lds r1,fpscr | |
1b61190c | 1502 | #ifdef FMOVD_WORKS |
fe9c9e23 | 1503 | fmov.d @r0+,dr4 |
1b61190c | 1504 | #else |
fe9c9e23 | 1505 | fmov.s @r0+,DR40 |
1506 | fmov.s @r0,DR41 | |
1b61190c | 1507 | #endif |
fe9c9e23 | 1508 | float fpul,dr0 |
1509 | lds r5,fpul | |
1510 | float fpul,dr2 | |
1511 | fadd dr4,dr0 | |
1512 | fadd dr4,dr2 | |
1513 | fdiv dr2,dr0 | |
1514 | ftrc dr0,fpul | |
1b61190c | 1515 | rts |
fe9c9e23 | 1516 | lds.l @r15+,fpscr |
1b61190c | 1517 | |
6a807829 | 1518 | #ifdef FMOVD_WORKS |
fe9c9e23 | 1519 | .align 3 // Make the double below 8 byte aligned. |
6a807829 | 1520 | #endif |
1b61190c | 1521 | trivial: |
1522 | rts | |
fe9c9e23 | 1523 | lds r4,fpul |
1b61190c | 1524 | |
1525 | .align 2 | |
fe9c9e23 | 1526 | L2: |
1527 | #ifdef FMOVD_WORKS | |
1528 | .long 0x180000 // FPSCR.PR = 1, FPSCR.SZ = 1 | |
1b61190c | 1529 | #else |
fe9c9e23 | 1530 | .long 0x80000 // FPSCR.PR = 1 |
1b61190c | 1531 | #endif |
fe9c9e23 | 1532 | L1: |
1b61190c | 1533 | .double 2147483648 |
1534 | ||
fe9c9e23 | 1535 | #else |
1536 | /* On SH4A we can use the fpchg instruction to flip the FPSCR.PR bit. | |
1537 | Although on SH4A fmovd usually works, it would require either additional | |
1538 | two fschg instructions or an FPSCR push + pop. It's not worth the effort | |
1539 | for loading only one double constant. */ | |
1540 | mov #1,r1 | |
1541 | cmp/hi r1,r5 | |
1542 | bf/s trivial | |
1543 | rotr r1 // r1 = 1 << 31 | |
1544 | fpchg | |
1545 | mova L1,r0 | |
1546 | xor r1,r4 | |
1547 | fmov.s @r0+,DR40 | |
1548 | lds r4,fpul | |
1549 | fmov.s @r0,DR41 | |
1550 | xor r1,r5 | |
1551 | float fpul,dr0 | |
1552 | lds r5,fpul | |
1553 | float fpul,dr2 | |
1554 | fadd dr4,dr0 | |
1555 | fadd dr4,dr2 | |
1556 | fdiv dr2,dr0 | |
1557 | ftrc dr0,fpul | |
1558 | rts | |
1559 | fpchg | |
1560 | ||
1561 | trivial: | |
1562 | rts | |
1563 | lds r4,fpul | |
1564 | ||
1565 | .align 2 | |
1566 | L1: | |
1567 | .double 2147483648 | |
1568 | ||
1569 | #endif /* __SH4A__ */ | |
1570 | ||
1571 | ||
805e22b2 | 1572 | ENDFUNC(GLOBAL(udivsi3_i4)) |
1b61190c | 1573 | #endif /* ! __SH4__ */ |
fe9c9e23 | 1574 | #endif /* L_udivsi3_i4 */ |
1b61190c | 1575 | |
1576 | #ifdef L_udivsi3 | |
1577 | /* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with | |
87ed74ef | 1578 | sh2e/sh3e code. */ |
a85a2db0 | 1579 | |
570d13b9 | 1580 | !! args in r4 and r5, result in r0, clobbers r4, pr, and t bit |
16f1dae0 | 1581 | .global GLOBAL(udivsi3) |
59312820 | 1582 | HIDDEN_FUNC(GLOBAL(udivsi3)) |
a85a2db0 | 1583 | |
87e19636 | 1584 | #if __SHMEDIA__ |
1585 | #if __SH5__ == 32 | |
1586 | .section .text..SHmedia32,"ax" | |
1587 | #else | |
1588 | .text | |
1589 | #endif | |
1590 | .align 2 | |
0c63e844 | 1591 | #if 0 |
87e19636 | 1592 | /* The assembly code that follows is a hand-optimized version of the C |
1593 | code that follows. Note that the registers that are modified are | |
1594 | exactly those listed as clobbered in the patterns udivsi3_i1 and | |
1595 | udivsi3_i1_media. | |
1596 | ||
1597 | unsigned | |
1598 | __udivsi3 (i, j) | |
1599 | unsigned i, j; | |
1600 | { | |
1601 | register unsigned long long r0 asm ("r0") = 0; | |
1602 | register unsigned long long r18 asm ("r18") = 1; | |
1603 | register unsigned long long r4 asm ("r4") = i; | |
1604 | register unsigned long long r19 asm ("r19") = j; | |
1605 | ||
1606 | r19 <<= 31; | |
1607 | r18 <<= 31; | |
1608 | do | |
1609 | if (r4 >= r19) | |
1610 | r0 |= r18, r4 -= r19; | |
1611 | while (r19 >>= 1, r18 >>= 1); | |
1612 | ||
1613 | return r0; | |
1614 | } | |
1615 | */ | |
1616 | GLOBAL(udivsi3): | |
1617 | pt/l LOCAL(udivsi3_dontadd), tr2 | |
1618 | pt/l LOCAL(udivsi3_loop), tr1 | |
1619 | ptabs/l r18, tr0 | |
1620 | movi 0, r0 | |
1621 | movi 1, r18 | |
1622 | addz.l r5, r63, r19 | |
1623 | addz.l r4, r63, r4 | |
1624 | shlli r19, 31, r19 | |
1625 | shlli r18, 31, r18 | |
1626 | LOCAL(udivsi3_loop): | |
1627 | bgtu r19, r4, tr2 | |
1628 | or r0, r18, r0 | |
1629 | sub r4, r19, r4 | |
1630 | LOCAL(udivsi3_dontadd): | |
1631 | shlri r18, 1, r18 | |
1632 | shlri r19, 1, r19 | |
1633 | bnei r18, 0, tr1 | |
1634 | blink tr0, r63 | |
1635 | #else | |
16f1dae0 | 1636 | GLOBAL(udivsi3): |
0c63e844 | 1637 | // inputs: r4,r5 |
1638 | // clobbered: r18,r19,r20,r21,r22,r25,tr0 | |
1639 | // result in r0. | |
1640 | addz.l r5,r63,r22 | |
1641 | nsb r22,r0 | |
1642 | shlld r22,r0,r25 | |
1643 | shlri r25,48,r25 | |
1644 | movi 0xffffffffffffbb0c,r20 // shift count eqiv 76 | |
1645 | sub r20,r25,r21 | |
1646 | mmulfx.w r21,r21,r19 | |
1647 | mshflo.w r21,r63,r21 | |
1648 | ptabs r18,tr0 | |
1649 | mmulfx.w r25,r19,r19 | |
1650 | sub r20,r0,r0 | |
1651 | /* bubble */ | |
1652 | msub.w r21,r19,r19 | |
1653 | addi r19,-2,r21 /* It would be nice for scheduling to do this add to r21 | |
1654 | before the msub.w, but we need a different value for | |
1655 | r19 to keep errors under control. */ | |
1656 | mulu.l r4,r21,r18 | |
1657 | mmulfx.w r19,r19,r19 | |
1658 | shlli r21,15,r21 | |
1659 | shlrd r18,r0,r18 | |
1660 | mulu.l r18,r22,r20 | |
1661 | mmacnfx.wl r25,r19,r21 | |
1662 | /* bubble */ | |
1663 | sub r4,r20,r25 | |
1664 | ||
1665 | mulu.l r25,r21,r19 | |
1666 | addi r0,14,r0 | |
1667 | /* bubble */ | |
1668 | shlrd r19,r0,r19 | |
1669 | mulu.l r19,r22,r20 | |
1670 | add r18,r19,r18 | |
1671 | /* bubble */ | |
1672 | sub.l r25,r20,r25 | |
1673 | ||
1674 | mulu.l r25,r21,r19 | |
1675 | addz.l r25,r63,r25 | |
1676 | sub r25,r22,r25 | |
1677 | shlrd r19,r0,r19 | |
1678 | mulu.l r19,r22,r20 | |
1679 | addi r25,1,r25 | |
1680 | add r18,r19,r18 | |
1681 | ||
1682 | cmpgt r25,r20,r25 | |
1683 | add.l r18,r25,r0 | |
1684 | blink tr0,r63 | |
1685 | #endif | |
273fffd6 | 1686 | #elif __SHMEDIA__ |
0c63e844 | 1687 | /* m5compact-nofpu - more emphasis on code size than on speed, but don't |
1688 | ignore speed altogether - div1 needs 9 cycles, subc 7 and rotcl 4. | |
1689 | So use a short shmedia loop. */ | |
1690 | // clobbered: r20,r21,r25,tr0,tr1,tr2 | |
1691 | .mode SHmedia | |
1692 | .section .text..SHmedia32,"ax" | |
1693 | .align 2 | |
1694 | GLOBAL(udivsi3): | |
1695 | pt/l LOCAL(udivsi3_dontsub), tr0 | |
1696 | pt/l LOCAL(udivsi3_loop), tr1 | |
1697 | ptabs/l r18,tr2 | |
1698 | shlli r5,32,r25 | |
1699 | addi r25,-1,r21 | |
1700 | addz.l r4,r63,r20 | |
1701 | LOCAL(udivsi3_loop): | |
1702 | shlli r20,1,r20 | |
1703 | bgeu/u r21,r20,tr0 | |
1704 | sub r20,r21,r20 | |
1705 | LOCAL(udivsi3_dontsub): | |
1706 | addi.l r25,-1,r25 | |
1707 | bnei r25,-32,tr1 | |
1708 | add.l r20,r63,r0 | |
1709 | blink tr2,r63 | |
273fffd6 | 1710 | #else /* ! __SHMEDIA__ */ |
0c63e844 | 1711 | LOCAL(div8): |
1712 | div1 r5,r4 | |
1713 | LOCAL(div7): | |
1714 | div1 r5,r4; div1 r5,r4; div1 r5,r4 | |
1715 | div1 r5,r4; div1 r5,r4; div1 r5,r4; rts; div1 r5,r4 | |
1716 | ||
1717 | LOCAL(divx4): | |
1718 | div1 r5,r4; rotcl r0 | |
1719 | div1 r5,r4; rotcl r0 | |
1720 | div1 r5,r4; rotcl r0 | |
1721 | rts; div1 r5,r4 | |
1722 | ||
1723 | GLOBAL(udivsi3): | |
1724 | sts.l pr,@-r15 | |
1725 | extu.w r5,r0 | |
1726 | cmp/eq r5,r0 | |
1727 | #ifdef __sh1__ | |
1728 | bf LOCAL(large_divisor) | |
1729 | #else | |
1730 | bf/s LOCAL(large_divisor) | |
1731 | #endif | |
1732 | div0u | |
1733 | swap.w r4,r0 | |
1734 | shlr16 r4 | |
1735 | bsr LOCAL(div8) | |
1736 | shll16 r5 | |
1737 | bsr LOCAL(div7) | |
1738 | div1 r5,r4 | |
1739 | xtrct r4,r0 | |
1740 | xtrct r0,r4 | |
1741 | bsr LOCAL(div8) | |
1742 | swap.w r4,r4 | |
1743 | bsr LOCAL(div7) | |
1744 | div1 r5,r4 | |
1745 | lds.l @r15+,pr | |
1746 | xtrct r4,r0 | |
1747 | swap.w r0,r0 | |
1748 | rotcl r0 | |
1749 | rts | |
1750 | shlr16 r5 | |
1751 | ||
1752 | LOCAL(large_divisor): | |
1753 | #ifdef __sh1__ | |
1754 | div0u | |
1755 | #endif | |
1756 | mov #0,r0 | |
1757 | xtrct r4,r0 | |
1758 | xtrct r0,r4 | |
1759 | bsr LOCAL(divx4) | |
1760 | rotcl r0 | |
1761 | bsr LOCAL(divx4) | |
1762 | rotcl r0 | |
1763 | bsr LOCAL(divx4) | |
1764 | rotcl r0 | |
1765 | bsr LOCAL(divx4) | |
1766 | rotcl r0 | |
1767 | lds.l @r15+,pr | |
1768 | rts | |
1769 | rotcl r0 | |
a85a2db0 | 1770 | |
805e22b2 | 1771 | ENDFUNC(GLOBAL(udivsi3)) |
87e19636 | 1772 | #endif /* ! __SHMEDIA__ */ |
0c63e844 | 1773 | #endif /* L_udivsi3 */ |
1774 | ||
1775 | #ifdef L_udivdi3 | |
273fffd6 | 1776 | #if __SHMEDIA__ |
0c63e844 | 1777 | .mode SHmedia |
1778 | .section .text..SHmedia32,"ax" | |
1779 | .align 2 | |
1780 | .global GLOBAL(udivdi3) | |
619f47f5 | 1781 | FUNC(GLOBAL(udivdi3)) |
0c63e844 | 1782 | GLOBAL(udivdi3): |
59312820 | 1783 | HIDDEN_ALIAS(udivdi3_internal,udivdi3) |
0c63e844 | 1784 | shlri r3,1,r4 |
1785 | nsb r4,r22 | |
1786 | shlld r3,r22,r6 | |
1787 | shlri r6,49,r5 | |
1788 | movi 0xffffffffffffbaf1,r21 /* .l shift count 17. */ | |
1789 | sub r21,r5,r1 | |
1790 | mmulfx.w r1,r1,r4 | |
1791 | mshflo.w r1,r63,r1 | |
1792 | sub r63,r22,r20 // r63 == 64 % 64 | |
1793 | mmulfx.w r5,r4,r4 | |
1794 | pta LOCAL(large_divisor),tr0 | |
1795 | addi r20,32,r9 | |
1796 | msub.w r1,r4,r1 | |
1797 | madd.w r1,r1,r1 | |
1798 | mmulfx.w r1,r1,r4 | |
1799 | shlri r6,32,r7 | |
1800 | bgt/u r9,r63,tr0 // large_divisor | |
1801 | mmulfx.w r5,r4,r4 | |
79c2c2aa | 1802 | shlri r2,32+14,r19 |
1803 | addi r22,-31,r0 | |
0c63e844 | 1804 | msub.w r1,r4,r1 |
1805 | ||
1806 | mulu.l r1,r7,r4 | |
1807 | addi r1,-3,r5 | |
1808 | mulu.l r5,r19,r5 | |
79c2c2aa | 1809 | sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2 |
0c63e844 | 1810 | shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as |
1811 | the case may be, %0000000000000000 000.11111111111, still */ | |
1812 | muls.l r1,r4,r4 /* leaving at least one sign bit. */ | |
79c2c2aa | 1813 | mulu.l r5,r3,r8 |
0c63e844 | 1814 | mshalds.l r1,r21,r1 |
1815 | shari r4,26,r4 | |
79c2c2aa | 1816 | shlld r8,r0,r8 |
1817 | add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5) | |
1818 | sub r2,r8,r2 | |
0c63e844 | 1819 | /* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */ |
1820 | ||
1821 | shlri r2,22,r21 | |
1822 | mulu.l r21,r1,r21 | |
79c2c2aa | 1823 | shlld r5,r0,r8 |
0c63e844 | 1824 | addi r20,30-22,r0 |
0c63e844 | 1825 | shlrd r21,r0,r21 |
1826 | mulu.l r21,r3,r5 | |
1827 | add r8,r21,r8 | |
79c2c2aa | 1828 | mcmpgt.l r21,r63,r21 // See Note 1 |
0c63e844 | 1829 | addi r20,30,r0 |
1830 | mshfhi.l r63,r21,r21 | |
1831 | sub r2,r5,r2 | |
1832 | andc r2,r21,r2 | |
1833 | ||
1834 | /* small divisor: need a third divide step */ | |
1835 | mulu.l r2,r1,r7 | |
1836 | ptabs r18,tr0 | |
1837 | addi r2,1,r2 | |
1838 | shlrd r7,r0,r7 | |
1839 | mulu.l r7,r3,r5 | |
1840 | add r8,r7,r8 | |
1841 | sub r2,r3,r2 | |
1842 | cmpgt r2,r5,r5 | |
1843 | add r8,r5,r2 | |
1844 | /* could test r3 here to check for divide by zero. */ | |
1845 | blink tr0,r63 | |
1846 | ||
1847 | LOCAL(large_divisor): | |
1848 | mmulfx.w r5,r4,r4 | |
1849 | shlrd r2,r9,r25 | |
1850 | shlri r25,32,r8 | |
1851 | msub.w r1,r4,r1 | |
1852 | ||
1853 | mulu.l r1,r7,r4 | |
1854 | addi r1,-3,r5 | |
1855 | mulu.l r5,r8,r5 | |
79c2c2aa | 1856 | sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2 |
0c63e844 | 1857 | shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as |
1858 | the case may be, %0000000000000000 000.11111111111, still */ | |
1859 | muls.l r1,r4,r4 /* leaving at least one sign bit. */ | |
79c2c2aa | 1860 | shlri r5,14-1,r8 |
0c63e844 | 1861 | mulu.l r8,r7,r5 |
1862 | mshalds.l r1,r21,r1 | |
1863 | shari r4,26,r4 | |
79c2c2aa | 1864 | add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5) |
0c63e844 | 1865 | sub r25,r5,r25 |
1866 | /* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */ | |
1867 | ||
1868 | shlri r25,22,r21 | |
1869 | mulu.l r21,r1,r21 | |
1870 | pta LOCAL(no_lo_adj),tr0 | |
1871 | addi r22,32,r0 | |
1872 | shlri r21,40,r21 | |
1873 | mulu.l r21,r7,r5 | |
1874 | add r8,r21,r8 | |
1875 | shlld r2,r0,r2 | |
1876 | sub r25,r5,r25 | |
79c2c2aa | 1877 | bgtu/u r7,r25,tr0 // no_lo_adj |
0c63e844 | 1878 | addi r8,1,r8 |
79c2c2aa | 1879 | sub r25,r7,r25 |
0c63e844 | 1880 | LOCAL(no_lo_adj): |
79c2c2aa | 1881 | mextr4 r2,r25,r2 |
0c63e844 | 1882 | |
1883 | /* large_divisor: only needs a few adjustments. */ | |
1884 | mulu.l r8,r6,r5 | |
1885 | ptabs r18,tr0 | |
1886 | /* bubble */ | |
1887 | cmpgtu r5,r2,r5 | |
1888 | sub r8,r5,r2 | |
1889 | blink tr0,r63 | |
619f47f5 | 1890 | ENDFUNC(GLOBAL(udivdi3)) |
0c63e844 | 1891 | /* Note 1: To shift the result of the second divide stage so that the result |
1892 | always fits into 32 bits, yet we still reduce the rest sufficiently | |
1893 | would require a lot of instructions to do the shifts just right. Using | |
1894 | the full 64 bit shift result to multiply with the divisor would require | |
1895 | four extra instructions for the upper 32 bits (shift / mulu / shift / sub). | |
808a491c | 1896 | Fortunately, if the upper 32 bits of the shift result are nonzero, we |
0c63e844 | 1897 | know that the rest after taking this partial result into account will |
1898 | fit into 32 bits. So we just clear the upper 32 bits of the rest if the | |
808a491c | 1899 | upper 32 bits of the partial result are nonzero. */ |
0c63e844 | 1900 | #endif /* __SHMEDIA__ */ |
1901 | #endif /* L_udivdi3 */ | |
1902 | ||
1903 | #ifdef L_divdi3 | |
273fffd6 | 1904 | #if __SHMEDIA__ |
0c63e844 | 1905 | .mode SHmedia |
1906 | .section .text..SHmedia32,"ax" | |
1907 | .align 2 | |
1908 | .global GLOBAL(divdi3) | |
619f47f5 | 1909 | FUNC(GLOBAL(divdi3)) |
0c63e844 | 1910 | GLOBAL(divdi3): |
59312820 | 1911 | pta GLOBAL(udivdi3_internal),tr0 |
0c63e844 | 1912 | shari r2,63,r22 |
1913 | shari r3,63,r23 | |
1914 | xor r2,r22,r2 | |
1915 | xor r3,r23,r3 | |
1916 | sub r2,r22,r2 | |
1917 | sub r3,r23,r3 | |
1918 | beq/u r22,r23,tr0 | |
1919 | ptabs r18,tr1 | |
1920 | blink tr0,r18 | |
1921 | sub r63,r2,r2 | |
1922 | blink tr1,r63 | |
619f47f5 | 1923 | ENDFUNC(GLOBAL(divdi3)) |
0c63e844 | 1924 | #endif /* __SHMEDIA__ */ |
1925 | #endif /* L_divdi3 */ | |
1926 | ||
1927 | #ifdef L_umoddi3 | |
273fffd6 | 1928 | #if __SHMEDIA__ |
0c63e844 | 1929 | .mode SHmedia |
1930 | .section .text..SHmedia32,"ax" | |
1931 | .align 2 | |
1932 | .global GLOBAL(umoddi3) | |
619f47f5 | 1933 | FUNC(GLOBAL(umoddi3)) |
0c63e844 | 1934 | GLOBAL(umoddi3): |
59312820 | 1935 | HIDDEN_ALIAS(umoddi3_internal,umoddi3) |
0c63e844 | 1936 | shlri r3,1,r4 |
1937 | nsb r4,r22 | |
1938 | shlld r3,r22,r6 | |
1939 | shlri r6,49,r5 | |
1940 | movi 0xffffffffffffbaf1,r21 /* .l shift count 17. */ | |
1941 | sub r21,r5,r1 | |
1942 | mmulfx.w r1,r1,r4 | |
1943 | mshflo.w r1,r63,r1 | |
1944 | sub r63,r22,r20 // r63 == 64 % 64 | |
1945 | mmulfx.w r5,r4,r4 | |
1946 | pta LOCAL(large_divisor),tr0 | |
1947 | addi r20,32,r9 | |
1948 | msub.w r1,r4,r1 | |
1949 | madd.w r1,r1,r1 | |
1950 | mmulfx.w r1,r1,r4 | |
1951 | shlri r6,32,r7 | |
1952 | bgt/u r9,r63,tr0 // large_divisor | |
1953 | mmulfx.w r5,r4,r4 | |
79c2c2aa | 1954 | shlri r2,32+14,r19 |
1955 | addi r22,-31,r0 | |
0c63e844 | 1956 | msub.w r1,r4,r1 |
1957 | ||
1958 | mulu.l r1,r7,r4 | |
1959 | addi r1,-3,r5 | |
1960 | mulu.l r5,r19,r5 | |
79c2c2aa | 1961 | sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2 |
0c63e844 | 1962 | shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as |
1963 | the case may be, %0000000000000000 000.11111111111, still */ | |
1964 | muls.l r1,r4,r4 /* leaving at least one sign bit. */ | |
79c2c2aa | 1965 | mulu.l r5,r3,r5 |
0c63e844 | 1966 | mshalds.l r1,r21,r1 |
1967 | shari r4,26,r4 | |
79c2c2aa | 1968 | shlld r5,r0,r5 |
1969 | add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5) | |
0c63e844 | 1970 | sub r2,r5,r2 |
1971 | /* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */ | |
1972 | ||
1973 | shlri r2,22,r21 | |
1974 | mulu.l r21,r1,r21 | |
1975 | addi r20,30-22,r0 | |
1976 | /* bubble */ /* could test r3 here to check for divide by zero. */ | |
1977 | shlrd r21,r0,r21 | |
1978 | mulu.l r21,r3,r5 | |
79c2c2aa | 1979 | mcmpgt.l r21,r63,r21 // See Note 1 |
0c63e844 | 1980 | addi r20,30,r0 |
1981 | mshfhi.l r63,r21,r21 | |
1982 | sub r2,r5,r2 | |
1983 | andc r2,r21,r2 | |
1984 | ||
1985 | /* small divisor: need a third divide step */ | |
1986 | mulu.l r2,r1,r7 | |
1987 | ptabs r18,tr0 | |
1988 | sub r2,r3,r8 /* re-use r8 here for rest - r3 */ | |
1989 | shlrd r7,r0,r7 | |
1990 | mulu.l r7,r3,r5 | |
1991 | /* bubble */ | |
1992 | addi r8,1,r7 | |
1993 | cmpgt r7,r5,r7 | |
1994 | cmvne r7,r8,r2 | |
1995 | sub r2,r5,r2 | |
1996 | blink tr0,r63 | |
1997 | ||
1998 | LOCAL(large_divisor): | |
1999 | mmulfx.w r5,r4,r4 | |
2000 | shlrd r2,r9,r25 | |
2001 | shlri r25,32,r8 | |
2002 | msub.w r1,r4,r1 | |
2003 | ||
2004 | mulu.l r1,r7,r4 | |
2005 | addi r1,-3,r5 | |
2006 | mulu.l r5,r8,r5 | |
79c2c2aa | 2007 | sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2 |
0c63e844 | 2008 | shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as |
2009 | the case may be, %0000000000000000 000.11111111111, still */ | |
2010 | muls.l r1,r4,r4 /* leaving at least one sign bit. */ | |
79c2c2aa | 2011 | shlri r5,14-1,r8 |
0c63e844 | 2012 | mulu.l r8,r7,r5 |
2013 | mshalds.l r1,r21,r1 | |
2014 | shari r4,26,r4 | |
79c2c2aa | 2015 | add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5) |
0c63e844 | 2016 | sub r25,r5,r25 |
2017 | /* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */ | |
2018 | ||
2019 | shlri r25,22,r21 | |
2020 | mulu.l r21,r1,r21 | |
2021 | pta LOCAL(no_lo_adj),tr0 | |
2022 | addi r22,32,r0 | |
2023 | shlri r21,40,r21 | |
2024 | mulu.l r21,r7,r5 | |
2025 | add r8,r21,r8 | |
2026 | shlld r2,r0,r2 | |
2027 | sub r25,r5,r25 | |
79c2c2aa | 2028 | bgtu/u r7,r25,tr0 // no_lo_adj |
0c63e844 | 2029 | addi r8,1,r8 |
79c2c2aa | 2030 | sub r25,r7,r25 |
0c63e844 | 2031 | LOCAL(no_lo_adj): |
79c2c2aa | 2032 | mextr4 r2,r25,r2 |
0c63e844 | 2033 | |
2034 | /* large_divisor: only needs a few adjustments. */ | |
2035 | mulu.l r8,r6,r5 | |
2036 | ptabs r18,tr0 | |
79c2c2aa | 2037 | add r2,r6,r7 |
0c63e844 | 2038 | cmpgtu r5,r2,r8 |
2039 | cmvne r8,r7,r2 | |
2040 | sub r2,r5,r2 | |
79c2c2aa | 2041 | shlrd r2,r22,r2 |
0c63e844 | 2042 | blink tr0,r63 |
619f47f5 | 2043 | ENDFUNC(GLOBAL(umoddi3)) |
0c63e844 | 2044 | /* Note 1: To shift the result of the second divide stage so that the result |
2045 | always fits into 32 bits, yet we still reduce the rest sufficiently | |
2046 | would require a lot of instructions to do the shifts just right. Using | |
2047 | the full 64 bit shift result to multiply with the divisor would require | |
2048 | four extra instructions for the upper 32 bits (shift / mulu / shift / sub). | |
808a491c | 2049 | Fortunately, if the upper 32 bits of the shift result are nonzero, we |
0c63e844 | 2050 | know that the rest after taking this partial result into account will |
2051 | fit into 32 bits. So we just clear the upper 32 bits of the rest if the | |
808a491c | 2052 | upper 32 bits of the partial result are nonzero. */ |
0c63e844 | 2053 | #endif /* __SHMEDIA__ */ |
2054 | #endif /* L_umoddi3 */ | |
2055 | ||
2056 | #ifdef L_moddi3 | |
273fffd6 | 2057 | #if __SHMEDIA__ |
0c63e844 | 2058 | .mode SHmedia |
2059 | .section .text..SHmedia32,"ax" | |
2060 | .align 2 | |
2061 | .global GLOBAL(moddi3) | |
619f47f5 | 2062 | FUNC(GLOBAL(moddi3)) |
0c63e844 | 2063 | GLOBAL(moddi3): |
59312820 | 2064 | pta GLOBAL(umoddi3_internal),tr0 |
0c63e844 | 2065 | shari r2,63,r22 |
2066 | shari r3,63,r23 | |
2067 | xor r2,r22,r2 | |
2068 | xor r3,r23,r3 | |
2069 | sub r2,r22,r2 | |
2070 | sub r3,r23,r3 | |
2071 | beq/u r22,r63,tr0 | |
2072 | ptabs r18,tr1 | |
2073 | blink tr0,r18 | |
2074 | sub r63,r2,r2 | |
2075 | blink tr1,r63 | |
619f47f5 | 2076 | ENDFUNC(GLOBAL(moddi3)) |
0c63e844 | 2077 | #endif /* __SHMEDIA__ */ |
2078 | #endif /* L_moddi3 */ | |
2079 | ||
d73f1571 | 2080 | #ifdef L_set_fpscr |
7105fb72 | 2081 | #if !defined (__SH2A_NOFPU__) |
2082 | #if defined (__SH2E__) || defined (__SH2A__) || defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || __SH5__ == 32 | |
87e19636 | 2083 | #ifdef __SH5__ |
2084 | .mode SHcompact | |
2085 | #endif | |
16f1dae0 | 2086 | .global GLOBAL(set_fpscr) |
59312820 | 2087 | HIDDEN_FUNC(GLOBAL(set_fpscr)) |
16f1dae0 | 2088 | GLOBAL(set_fpscr): |
d73f1571 | 2089 | lds r4,fpscr |
619f47f5 | 2090 | #ifdef __PIC__ |
2091 | mov.l r12,@-r15 | |
a9cfe83b | 2092 | #ifdef __vxworks |
2093 | mov.l LOCAL(set_fpscr_L0_base),r12 | |
2094 | mov.l LOCAL(set_fpscr_L0_index),r0 | |
2095 | mov.l @r12,r12 | |
2096 | mov.l @(r0,r12),r12 | |
2097 | #else | |
619f47f5 | 2098 | mova LOCAL(set_fpscr_L0),r0 |
2099 | mov.l LOCAL(set_fpscr_L0),r12 | |
2100 | add r0,r12 | |
a9cfe83b | 2101 | #endif |
619f47f5 | 2102 | mov.l LOCAL(set_fpscr_L1),r0 |
2103 | mov.l @(r0,r12),r1 | |
2104 | mov.l @r15+,r12 | |
2105 | #else | |
16f1dae0 | 2106 | mov.l LOCAL(set_fpscr_L1),r1 |
619f47f5 | 2107 | #endif |
d73f1571 | 2108 | swap.w r4,r0 |
2109 | or #24,r0 | |
1b61190c | 2110 | #ifndef FMOVD_WORKS |
d73f1571 | 2111 | xor #16,r0 |
1b61190c | 2112 | #endif |
7105fb72 | 2113 | #if defined(__SH4__) || defined (__SH2A_DOUBLE__) |
1b61190c | 2114 | swap.w r0,r3 |
2115 | mov.l r3,@(4,r1) | |
87ed74ef | 2116 | #else /* defined (__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */ |
d73f1571 | 2117 | swap.w r0,r2 |
2118 | mov.l r2,@r1 | |
1b61190c | 2119 | #endif |
2120 | #ifndef FMOVD_WORKS | |
d73f1571 | 2121 | xor #8,r0 |
1b61190c | 2122 | #else |
2123 | xor #24,r0 | |
2124 | #endif | |
7105fb72 | 2125 | #if defined(__SH4__) || defined (__SH2A_DOUBLE__) |
1b61190c | 2126 | swap.w r0,r2 |
2127 | rts | |
2128 | mov.l r2,@r1 | |
87ed74ef | 2129 | #else /* defined(__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */ |
d73f1571 | 2130 | swap.w r0,r3 |
2131 | rts | |
2132 | mov.l r3,@(4,r1) | |
1b61190c | 2133 | #endif |
d73f1571 | 2134 | .align 2 |
619f47f5 | 2135 | #ifdef __PIC__ |
a9cfe83b | 2136 | #ifdef __vxworks |
2137 | LOCAL(set_fpscr_L0_base): | |
2138 | .long ___GOTT_BASE__ | |
2139 | LOCAL(set_fpscr_L0_index): | |
2140 | .long ___GOTT_INDEX__ | |
2141 | #else | |
619f47f5 | 2142 | LOCAL(set_fpscr_L0): |
2143 | .long _GLOBAL_OFFSET_TABLE_ | |
a9cfe83b | 2144 | #endif |
619f47f5 | 2145 | LOCAL(set_fpscr_L1): |
2146 | .long GLOBAL(fpscr_values@GOT) | |
2147 | #else | |
16f1dae0 | 2148 | LOCAL(set_fpscr_L1): |
2149 | .long GLOBAL(fpscr_values) | |
619f47f5 | 2150 | #endif |
805e22b2 | 2151 | |
2152 | ENDFUNC(GLOBAL(set_fpscr)) | |
c03bb5e0 | 2153 | #ifndef NO_FPSCR_VALUES |
d73f1571 | 2154 | #ifdef __ELF__ |
16f1dae0 | 2155 | .comm GLOBAL(fpscr_values),8,4 |
d73f1571 | 2156 | #else |
16f1dae0 | 2157 | .comm GLOBAL(fpscr_values),8 |
d73f1571 | 2158 | #endif /* ELF */ |
c03bb5e0 | 2159 | #endif /* NO_FPSCR_VALUES */ |
87ed74ef | 2160 | #endif /* SH2E / SH3E / SH4 */ |
7105fb72 | 2161 | #endif /* __SH2A_NOFPU__ */ |
d73f1571 | 2162 | #endif /* L_set_fpscr */ |
4e734737 | 2163 | #ifdef L_ic_invalidate |
87e19636 | 2164 | #if __SH5__ == 32 |
2165 | .mode SHmedia | |
2166 | .section .text..SHmedia32,"ax" | |
2167 | .align 2 | |
e40c2d35 | 2168 | .global GLOBAL(init_trampoline) |
59312820 | 2169 | HIDDEN_FUNC(GLOBAL(init_trampoline)) |
e40c2d35 | 2170 | GLOBAL(init_trampoline): |
2171 | st.l r0,8,r2 | |
2172 | #ifdef __LITTLE_ENDIAN__ | |
2173 | movi 9,r20 | |
2174 | shori 0x402b,r20 | |
2175 | shori 0xd101,r20 | |
2176 | shori 0xd002,r20 | |
2177 | #else | |
2178 | movi 0xffffffffffffd002,r20 | |
2179 | shori 0xd101,r20 | |
2180 | shori 0x402b,r20 | |
2181 | shori 9,r20 | |
2182 | #endif | |
2183 | st.q r0,0,r20 | |
2184 | st.l r0,12,r3 | |
59312820 | 2185 | ENDFUNC(GLOBAL(init_trampoline)) |
87e19636 | 2186 | .global GLOBAL(ic_invalidate) |
59312820 | 2187 | HIDDEN_FUNC(GLOBAL(ic_invalidate)) |
87e19636 | 2188 | GLOBAL(ic_invalidate): |
0c63e844 | 2189 | ocbwb r0,0 |
2190 | synco | |
87e19636 | 2191 | icbi r0, 0 |
2192 | ptabs r18, tr0 | |
2193 | synci | |
2194 | blink tr0, r63 | |
619f47f5 | 2195 | ENDFUNC(GLOBAL(ic_invalidate)) |
9435e831 | 2196 | #elif defined(__SH4A__) |
2197 | .global GLOBAL(ic_invalidate) | |
59312820 | 2198 | HIDDEN_FUNC(GLOBAL(ic_invalidate)) |
9435e831 | 2199 | GLOBAL(ic_invalidate): |
2200 | ocbwb @r4 | |
2201 | synco | |
9435e831 | 2202 | icbi @r4 |
85714674 | 2203 | rts |
2204 | nop | |
9435e831 | 2205 | ENDFUNC(GLOBAL(ic_invalidate)) |
59312820 | 2206 | #elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__)) |
2207 | /* For system code, we use ic_invalidate_line_i, but user code | |
2208 | needs a different mechanism. A kernel call is generally not | |
2209 | available, and it would also be slow. Different SH4 variants use | |
2210 | different sizes and associativities of the Icache. We use a small | |
2211 | bit of dispatch code that can be put hidden in every shared object, | |
2212 | which calls the actual processor-specific invalidation code in a | |
2213 | separate module. | |
2214 | Or if you have operating system support, the OS could mmap the | |
2215 | procesor-specific code from a single page, since it is highly | |
2216 | repetitive. */ | |
4e734737 | 2217 | .global GLOBAL(ic_invalidate) |
59312820 | 2218 | HIDDEN_FUNC(GLOBAL(ic_invalidate)) |
4e734737 | 2219 | GLOBAL(ic_invalidate): |
59312820 | 2220 | #ifdef __pic__ |
a9cfe83b | 2221 | #ifdef __vxworks |
2222 | mov.l 1f,r1 | |
2223 | mov.l 2f,r0 | |
2224 | mov.l @r1,r1 | |
2225 | mov.l 0f,r2 | |
2226 | mov.l @(r0,r1),r0 | |
2227 | #else | |
2228 | mov.l 1f,r1 | |
2229 | mova 1f,r0 | |
2230 | mov.l 0f,r2 | |
59312820 | 2231 | add r1,r0 |
a9cfe83b | 2232 | #endif |
59312820 | 2233 | mov.l @(r0,r2),r1 |
a9cfe83b | 2234 | #else |
2235 | mov.l 0f,r1 | |
59312820 | 2236 | #endif |
2237 | ocbwb @r4 | |
2238 | mov.l @(8,r1),r0 | |
2239 | sub r1,r4 | |
2240 | and r4,r0 | |
2241 | add r1,r0 | |
2242 | jmp @r0 | |
2243 | mov.l @(4,r1),r0 | |
a819eb1f | 2244 | .align 2 |
59312820 | 2245 | #ifndef __pic__ |
2246 | 0: .long GLOBAL(ic_invalidate_array) | |
2247 | #else /* __pic__ */ | |
2248 | .global GLOBAL(ic_invalidate_array) | |
a9cfe83b | 2249 | 0: .long GLOBAL(ic_invalidate_array)@GOT |
2250 | #ifdef __vxworks | |
2251 | 1: .long ___GOTT_BASE__ | |
2252 | 2: .long ___GOTT_INDEX__ | |
2253 | #else | |
2254 | 1: .long _GLOBAL_OFFSET_TABLE_ | |
2255 | #endif | |
59312820 | 2256 | ENDFUNC(GLOBAL(ic_invalidate)) |
2257 | #endif /* __pic__ */ | |
2258 | #endif /* SH4 */ | |
2259 | #endif /* L_ic_invalidate */ | |
2260 | ||
2261 | #ifdef L_ic_invalidate_array | |
3a6994f8 | 2262 | #if defined(__SH4A__) || (defined (__FORCE_SH4A__) && (defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__)))) |
2263 | .global GLOBAL(ic_invalidate_array) | |
59312820 | 2264 | /* This is needed when an SH4 dso with trampolines is used on SH4A. */ |
2265 | .global GLOBAL(ic_invalidate_array) | |
2266 | FUNC(GLOBAL(ic_invalidate_array)) | |
2267 | GLOBAL(ic_invalidate_array): | |
2268 | add r1,r4 | |
2269 | synco | |
59312820 | 2270 | icbi @r4 |
85714674 | 2271 | rts |
2272 | nop | |
2273 | .align 2 | |
59312820 | 2274 | .long 0 |
2275 | ENDFUNC(GLOBAL(ic_invalidate_array)) | |
2276 | #elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__)) | |
2277 | .global GLOBAL(ic_invalidate_array) | |
8110cc37 | 2278 | .p2align 5 |
59312820 | 2279 | FUNC(GLOBAL(ic_invalidate_array)) |
8110cc37 | 2280 | /* This must be aligned to the beginning of a cache line. */ |
59312820 | 2281 | GLOBAL(ic_invalidate_array): |
2282 | #ifndef WAYS | |
2283 | #define WAYS 4 | |
2284 | #define WAY_SIZE 0x4000 | |
2285 | #endif | |
2286 | #if WAYS == 1 | |
2287 | .rept WAY_SIZE * WAYS / 32 | |
2288 | rts | |
2289 | nop | |
2290 | .rept 7 | |
2291 | .long WAY_SIZE - 32 | |
2292 | .endr | |
2293 | .endr | |
2294 | #elif WAYS <= 6 | |
2295 | .rept WAY_SIZE * WAYS / 32 | |
2296 | braf r0 | |
2297 | add #-8,r0 | |
2298 | .long WAY_SIZE + 8 | |
2299 | .long WAY_SIZE - 32 | |
2300 | .rept WAYS-2 | |
2301 | braf r0 | |
2302 | nop | |
2303 | .endr | |
2304 | .rept 7 - WAYS | |
2305 | rts | |
2306 | nop | |
2307 | .endr | |
2308 | .endr | |
2309 | #else /* WAYS > 6 */ | |
2310 | /* This variant needs two different pages for mmap-ing. */ | |
2311 | .rept WAYS-1 | |
2312 | .rept WAY_SIZE / 32 | |
2313 | braf r0 | |
2314 | nop | |
2315 | .long WAY_SIZE | |
2316 | .rept 6 | |
2317 | .long WAY_SIZE - 32 | |
2318 | .endr | |
2319 | .endr | |
2320 | .endr | |
2321 | .rept WAY_SIZE / 32 | |
4e734737 | 2322 | rts |
8110cc37 | 2323 | .rept 15 |
4e734737 | 2324 | nop |
2325 | .endr | |
8110cc37 | 2326 | .endr |
59312820 | 2327 | #endif /* WAYS */ |
2328 | ENDFUNC(GLOBAL(ic_invalidate_array)) | |
4e734737 | 2329 | #endif /* SH4 */ |
59312820 | 2330 | #endif /* L_ic_invalidate_array */ |
87e19636 | 2331 | |
2332 | #if defined (__SH5__) && __SH5__ == 32 | |
2333 | #ifdef L_shcompact_call_trampoline | |
2334 | .section .rodata | |
2335 | .align 1 | |
2336 | LOCAL(ct_main_table): | |
2337 | .word LOCAL(ct_r2_fp) - datalabel LOCAL(ct_main_label) | |
2338 | .word LOCAL(ct_r2_ld) - datalabel LOCAL(ct_main_label) | |
2339 | .word LOCAL(ct_r2_pop) - datalabel LOCAL(ct_main_label) | |
2340 | .word LOCAL(ct_r3_fp) - datalabel LOCAL(ct_main_label) | |
2341 | .word LOCAL(ct_r3_ld) - datalabel LOCAL(ct_main_label) | |
2342 | .word LOCAL(ct_r3_pop) - datalabel LOCAL(ct_main_label) | |
2343 | .word LOCAL(ct_r4_fp) - datalabel LOCAL(ct_main_label) | |
2344 | .word LOCAL(ct_r4_ld) - datalabel LOCAL(ct_main_label) | |
2345 | .word LOCAL(ct_r4_pop) - datalabel LOCAL(ct_main_label) | |
2346 | .word LOCAL(ct_r5_fp) - datalabel LOCAL(ct_main_label) | |
2347 | .word LOCAL(ct_r5_ld) - datalabel LOCAL(ct_main_label) | |
2348 | .word LOCAL(ct_r5_pop) - datalabel LOCAL(ct_main_label) | |
2349 | .word LOCAL(ct_r6_fph) - datalabel LOCAL(ct_main_label) | |
2350 | .word LOCAL(ct_r6_fpl) - datalabel LOCAL(ct_main_label) | |
2351 | .word LOCAL(ct_r6_ld) - datalabel LOCAL(ct_main_label) | |
2352 | .word LOCAL(ct_r6_pop) - datalabel LOCAL(ct_main_label) | |
2353 | .word LOCAL(ct_r7_fph) - datalabel LOCAL(ct_main_label) | |
2354 | .word LOCAL(ct_r7_fpl) - datalabel LOCAL(ct_main_label) | |
2355 | .word LOCAL(ct_r7_ld) - datalabel LOCAL(ct_main_label) | |
2356 | .word LOCAL(ct_r7_pop) - datalabel LOCAL(ct_main_label) | |
2357 | .word LOCAL(ct_r8_fph) - datalabel LOCAL(ct_main_label) | |
2358 | .word LOCAL(ct_r8_fpl) - datalabel LOCAL(ct_main_label) | |
2359 | .word LOCAL(ct_r8_ld) - datalabel LOCAL(ct_main_label) | |
2360 | .word LOCAL(ct_r8_pop) - datalabel LOCAL(ct_main_label) | |
2361 | .word LOCAL(ct_r9_fph) - datalabel LOCAL(ct_main_label) | |
2362 | .word LOCAL(ct_r9_fpl) - datalabel LOCAL(ct_main_label) | |
2363 | .word LOCAL(ct_r9_ld) - datalabel LOCAL(ct_main_label) | |
2364 | .word LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label) | |
2365 | .word LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label) | |
2366 | .word LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label) | |
2367 | .word LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label) | |
2368 | .word LOCAL(ct_ret_wide) - datalabel LOCAL(ct_main_label) | |
2369 | .word LOCAL(ct_call_func) - datalabel LOCAL(ct_main_label) | |
2370 | .mode SHmedia | |
2371 | .section .text..SHmedia32, "ax" | |
2372 | .align 2 | |
2373 | ||
2374 | /* This function loads 64-bit general-purpose registers from the | |
2375 | stack, from a memory address contained in them or from an FP | |
2376 | register, according to a cookie passed in r1. Its execution | |
2377 | time is linear on the number of registers that actually have | |
2378 | to be copied. See sh.h for details on the actual bit pattern. | |
2379 | ||
2380 | The function to be called is passed in r0. If a 32-bit return | |
2381 | value is expected, the actual function will be tail-called, | |
2382 | otherwise the return address will be stored in r10 (that the | |
2383 | caller should expect to be clobbered) and the return value | |
2384 | will be expanded into r2/r3 upon return. */ | |
2385 | ||
2386 | .global GLOBAL(GCC_shcompact_call_trampoline) | |
805e22b2 | 2387 | FUNC(GLOBAL(GCC_shcompact_call_trampoline)) |
87e19636 | 2388 | GLOBAL(GCC_shcompact_call_trampoline): |
2389 | ptabs/l r0, tr0 /* Prepare to call the actual function. */ | |
2390 | movi ((datalabel LOCAL(ct_main_table) - 31 * 2) >> 16) & 65535, r0 | |
2391 | pt/l LOCAL(ct_loop), tr1 | |
2392 | addz.l r1, r63, r1 | |
2393 | shori ((datalabel LOCAL(ct_main_table) - 31 * 2)) & 65535, r0 | |
2394 | LOCAL(ct_loop): | |
2395 | nsb r1, r28 | |
2396 | shlli r28, 1, r29 | |
2397 | ldx.w r0, r29, r30 | |
2398 | LOCAL(ct_main_label): | |
2399 | ptrel/l r30, tr2 | |
2400 | blink tr2, r63 | |
2401 | LOCAL(ct_r2_fp): /* Copy r2 from an FP register. */ | |
2402 | /* It must be dr0, so just do it. */ | |
2403 | fmov.dq dr0, r2 | |
2404 | movi 7, r30 | |
2405 | shlli r30, 29, r31 | |
2406 | andc r1, r31, r1 | |
2407 | blink tr1, r63 | |
2408 | LOCAL(ct_r3_fp): /* Copy r3 from an FP register. */ | |
2409 | /* It is either dr0 or dr2. */ | |
2410 | movi 7, r30 | |
2411 | shlri r1, 26, r32 | |
2412 | shlli r30, 26, r31 | |
2413 | andc r1, r31, r1 | |
2414 | fmov.dq dr0, r3 | |
2415 | beqi/l r32, 4, tr1 | |
2416 | fmov.dq dr2, r3 | |
2417 | blink tr1, r63 | |
2418 | LOCAL(ct_r4_fp): /* Copy r4 from an FP register. */ | |
2419 | shlri r1, 23 - 3, r34 | |
2420 | andi r34, 3 << 3, r33 | |
2421 | addi r33, LOCAL(ct_r4_fp_copy) - datalabel LOCAL(ct_r4_fp_base), r32 | |
2422 | LOCAL(ct_r4_fp_base): | |
2423 | ptrel/l r32, tr2 | |
2424 | movi 7, r30 | |
2425 | shlli r30, 23, r31 | |
2426 | andc r1, r31, r1 | |
2427 | blink tr2, r63 | |
2428 | LOCAL(ct_r4_fp_copy): | |
2429 | fmov.dq dr0, r4 | |
2430 | blink tr1, r63 | |
2431 | fmov.dq dr2, r4 | |
2432 | blink tr1, r63 | |
2433 | fmov.dq dr4, r4 | |
2434 | blink tr1, r63 | |
2435 | LOCAL(ct_r5_fp): /* Copy r5 from an FP register. */ | |
2436 | shlri r1, 20 - 3, r34 | |
2437 | andi r34, 3 << 3, r33 | |
2438 | addi r33, LOCAL(ct_r5_fp_copy) - datalabel LOCAL(ct_r5_fp_base), r32 | |
2439 | LOCAL(ct_r5_fp_base): | |
2440 | ptrel/l r32, tr2 | |
2441 | movi 7, r30 | |
2442 | shlli r30, 20, r31 | |
2443 | andc r1, r31, r1 | |
2444 | blink tr2, r63 | |
2445 | LOCAL(ct_r5_fp_copy): | |
2446 | fmov.dq dr0, r5 | |
2447 | blink tr1, r63 | |
2448 | fmov.dq dr2, r5 | |
2449 | blink tr1, r63 | |
2450 | fmov.dq dr4, r5 | |
2451 | blink tr1, r63 | |
2452 | fmov.dq dr6, r5 | |
2453 | blink tr1, r63 | |
2454 | LOCAL(ct_r6_fph): /* Copy r6 from a high FP register. */ | |
2455 | /* It must be dr8. */ | |
2456 | fmov.dq dr8, r6 | |
2457 | movi 15, r30 | |
2458 | shlli r30, 16, r31 | |
2459 | andc r1, r31, r1 | |
2460 | blink tr1, r63 | |
2461 | LOCAL(ct_r6_fpl): /* Copy r6 from a low FP register. */ | |
2462 | shlri r1, 16 - 3, r34 | |
2463 | andi r34, 3 << 3, r33 | |
2464 | addi r33, LOCAL(ct_r6_fp_copy) - datalabel LOCAL(ct_r6_fp_base), r32 | |
2465 | LOCAL(ct_r6_fp_base): | |
2466 | ptrel/l r32, tr2 | |
2467 | movi 7, r30 | |
2468 | shlli r30, 16, r31 | |
2469 | andc r1, r31, r1 | |
2470 | blink tr2, r63 | |
2471 | LOCAL(ct_r6_fp_copy): | |
2472 | fmov.dq dr0, r6 | |
2473 | blink tr1, r63 | |
2474 | fmov.dq dr2, r6 | |
2475 | blink tr1, r63 | |
2476 | fmov.dq dr4, r6 | |
2477 | blink tr1, r63 | |
2478 | fmov.dq dr6, r6 | |
2479 | blink tr1, r63 | |
2480 | LOCAL(ct_r7_fph): /* Copy r7 from a high FP register. */ | |
2481 | /* It is either dr8 or dr10. */ | |
2482 | movi 15 << 12, r31 | |
2483 | shlri r1, 12, r32 | |
2484 | andc r1, r31, r1 | |
2485 | fmov.dq dr8, r7 | |
2486 | beqi/l r32, 8, tr1 | |
2487 | fmov.dq dr10, r7 | |
2488 | blink tr1, r63 | |
2489 | LOCAL(ct_r7_fpl): /* Copy r7 from a low FP register. */ | |
2490 | shlri r1, 12 - 3, r34 | |
2491 | andi r34, 3 << 3, r33 | |
2492 | addi r33, LOCAL(ct_r7_fp_copy) - datalabel LOCAL(ct_r7_fp_base), r32 | |
2493 | LOCAL(ct_r7_fp_base): | |
2494 | ptrel/l r32, tr2 | |
2495 | movi 7 << 12, r31 | |
2496 | andc r1, r31, r1 | |
2497 | blink tr2, r63 | |
2498 | LOCAL(ct_r7_fp_copy): | |
2499 | fmov.dq dr0, r7 | |
2500 | blink tr1, r63 | |
2501 | fmov.dq dr2, r7 | |
2502 | blink tr1, r63 | |
2503 | fmov.dq dr4, r7 | |
2504 | blink tr1, r63 | |
2505 | fmov.dq dr6, r7 | |
2506 | blink tr1, r63 | |
2507 | LOCAL(ct_r8_fph): /* Copy r8 from a high FP register. */ | |
2508 | /* It is either dr8 or dr10. */ | |
2509 | movi 15 << 8, r31 | |
2510 | andi r1, 1 << 8, r32 | |
2511 | andc r1, r31, r1 | |
2512 | fmov.dq dr8, r8 | |
2513 | beq/l r32, r63, tr1 | |
2514 | fmov.dq dr10, r8 | |
2515 | blink tr1, r63 | |
2516 | LOCAL(ct_r8_fpl): /* Copy r8 from a low FP register. */ | |
2517 | shlri r1, 8 - 3, r34 | |
2518 | andi r34, 3 << 3, r33 | |
2519 | addi r33, LOCAL(ct_r8_fp_copy) - datalabel LOCAL(ct_r8_fp_base), r32 | |
2520 | LOCAL(ct_r8_fp_base): | |
2521 | ptrel/l r32, tr2 | |
2522 | movi 7 << 8, r31 | |
2523 | andc r1, r31, r1 | |
2524 | blink tr2, r63 | |
2525 | LOCAL(ct_r8_fp_copy): | |
2526 | fmov.dq dr0, r8 | |
2527 | blink tr1, r63 | |
2528 | fmov.dq dr2, r8 | |
2529 | blink tr1, r63 | |
2530 | fmov.dq dr4, r8 | |
2531 | blink tr1, r63 | |
2532 | fmov.dq dr6, r8 | |
2533 | blink tr1, r63 | |
2534 | LOCAL(ct_r9_fph): /* Copy r9 from a high FP register. */ | |
2535 | /* It is either dr8 or dr10. */ | |
2536 | movi 15 << 4, r31 | |
2537 | andi r1, 1 << 4, r32 | |
2538 | andc r1, r31, r1 | |
2539 | fmov.dq dr8, r9 | |
2540 | beq/l r32, r63, tr1 | |
2541 | fmov.dq dr10, r9 | |
2542 | blink tr1, r63 | |
2543 | LOCAL(ct_r9_fpl): /* Copy r9 from a low FP register. */ | |
2544 | shlri r1, 4 - 3, r34 | |
2545 | andi r34, 3 << 3, r33 | |
2546 | addi r33, LOCAL(ct_r9_fp_copy) - datalabel LOCAL(ct_r9_fp_base), r32 | |
2547 | LOCAL(ct_r9_fp_base): | |
2548 | ptrel/l r32, tr2 | |
2549 | movi 7 << 4, r31 | |
2550 | andc r1, r31, r1 | |
2551 | blink tr2, r63 | |
2552 | LOCAL(ct_r9_fp_copy): | |
2553 | fmov.dq dr0, r9 | |
2554 | blink tr1, r63 | |
2555 | fmov.dq dr2, r9 | |
2556 | blink tr1, r63 | |
2557 | fmov.dq dr4, r9 | |
2558 | blink tr1, r63 | |
2559 | fmov.dq dr6, r9 | |
2560 | blink tr1, r63 | |
2561 | LOCAL(ct_r2_ld): /* Copy r2 from a memory address. */ | |
2562 | pt/l LOCAL(ct_r2_load), tr2 | |
2563 | movi 3, r30 | |
2564 | shlli r30, 29, r31 | |
2565 | and r1, r31, r32 | |
2566 | andc r1, r31, r1 | |
2567 | beq/l r31, r32, tr2 | |
2568 | addi.l r2, 8, r3 | |
2569 | ldx.q r2, r63, r2 | |
2570 | /* Fall through. */ | |
2571 | LOCAL(ct_r3_ld): /* Copy r3 from a memory address. */ | |
2572 | pt/l LOCAL(ct_r3_load), tr2 | |
2573 | movi 3, r30 | |
2574 | shlli r30, 26, r31 | |
2575 | and r1, r31, r32 | |
2576 | andc r1, r31, r1 | |
2577 | beq/l r31, r32, tr2 | |
2578 | addi.l r3, 8, r4 | |
2579 | ldx.q r3, r63, r3 | |
2580 | LOCAL(ct_r4_ld): /* Copy r4 from a memory address. */ | |
2581 | pt/l LOCAL(ct_r4_load), tr2 | |
2582 | movi 3, r30 | |
2583 | shlli r30, 23, r31 | |
2584 | and r1, r31, r32 | |
2585 | andc r1, r31, r1 | |
2586 | beq/l r31, r32, tr2 | |
2587 | addi.l r4, 8, r5 | |
2588 | ldx.q r4, r63, r4 | |
2589 | LOCAL(ct_r5_ld): /* Copy r5 from a memory address. */ | |
2590 | pt/l LOCAL(ct_r5_load), tr2 | |
2591 | movi 3, r30 | |
2592 | shlli r30, 20, r31 | |
2593 | and r1, r31, r32 | |
2594 | andc r1, r31, r1 | |
2595 | beq/l r31, r32, tr2 | |
2596 | addi.l r5, 8, r6 | |
2597 | ldx.q r5, r63, r5 | |
2598 | LOCAL(ct_r6_ld): /* Copy r6 from a memory address. */ | |
2599 | pt/l LOCAL(ct_r6_load), tr2 | |
2600 | movi 3 << 16, r31 | |
2601 | and r1, r31, r32 | |
2602 | andc r1, r31, r1 | |
2603 | beq/l r31, r32, tr2 | |
2604 | addi.l r6, 8, r7 | |
2605 | ldx.q r6, r63, r6 | |
2606 | LOCAL(ct_r7_ld): /* Copy r7 from a memory address. */ | |
2607 | pt/l LOCAL(ct_r7_load), tr2 | |
2608 | movi 3 << 12, r31 | |
2609 | and r1, r31, r32 | |
2610 | andc r1, r31, r1 | |
2611 | beq/l r31, r32, tr2 | |
2612 | addi.l r7, 8, r8 | |
2613 | ldx.q r7, r63, r7 | |
2614 | LOCAL(ct_r8_ld): /* Copy r8 from a memory address. */ | |
2615 | pt/l LOCAL(ct_r8_load), tr2 | |
2616 | movi 3 << 8, r31 | |
2617 | and r1, r31, r32 | |
2618 | andc r1, r31, r1 | |
2619 | beq/l r31, r32, tr2 | |
2620 | addi.l r8, 8, r9 | |
2621 | ldx.q r8, r63, r8 | |
2622 | LOCAL(ct_r9_ld): /* Copy r9 from a memory address. */ | |
2623 | pt/l LOCAL(ct_check_tramp), tr2 | |
2624 | ldx.q r9, r63, r9 | |
2625 | blink tr2, r63 | |
2626 | LOCAL(ct_r2_load): | |
2627 | ldx.q r2, r63, r2 | |
2628 | blink tr1, r63 | |
2629 | LOCAL(ct_r3_load): | |
2630 | ldx.q r3, r63, r3 | |
2631 | blink tr1, r63 | |
2632 | LOCAL(ct_r4_load): | |
2633 | ldx.q r4, r63, r4 | |
2634 | blink tr1, r63 | |
2635 | LOCAL(ct_r5_load): | |
2636 | ldx.q r5, r63, r5 | |
2637 | blink tr1, r63 | |
2638 | LOCAL(ct_r6_load): | |
2639 | ldx.q r6, r63, r6 | |
2640 | blink tr1, r63 | |
2641 | LOCAL(ct_r7_load): | |
2642 | ldx.q r7, r63, r7 | |
2643 | blink tr1, r63 | |
2644 | LOCAL(ct_r8_load): | |
2645 | ldx.q r8, r63, r8 | |
2646 | blink tr1, r63 | |
2647 | LOCAL(ct_r2_pop): /* Pop r2 from the stack. */ | |
2648 | movi 1, r30 | |
2649 | ldx.q r15, r63, r2 | |
2650 | shlli r30, 29, r31 | |
2651 | addi.l r15, 8, r15 | |
2652 | andc r1, r31, r1 | |
2653 | blink tr1, r63 | |
2654 | LOCAL(ct_r3_pop): /* Pop r3 from the stack. */ | |
2655 | movi 1, r30 | |
2656 | ldx.q r15, r63, r3 | |
2657 | shlli r30, 26, r31 | |
2658 | addi.l r15, 8, r15 | |
2659 | andc r1, r31, r1 | |
2660 | blink tr1, r63 | |
2661 | LOCAL(ct_r4_pop): /* Pop r4 from the stack. */ | |
2662 | movi 1, r30 | |
2663 | ldx.q r15, r63, r4 | |
2664 | shlli r30, 23, r31 | |
2665 | addi.l r15, 8, r15 | |
2666 | andc r1, r31, r1 | |
2667 | blink tr1, r63 | |
2668 | LOCAL(ct_r5_pop): /* Pop r5 from the stack. */ | |
2669 | movi 1, r30 | |
2670 | ldx.q r15, r63, r5 | |
2671 | shlli r30, 20, r31 | |
2672 | addi.l r15, 8, r15 | |
2673 | andc r1, r31, r1 | |
2674 | blink tr1, r63 | |
2675 | LOCAL(ct_r6_pop): /* Pop r6 from the stack. */ | |
2676 | movi 1, r30 | |
2677 | ldx.q r15, r63, r6 | |
2678 | shlli r30, 16, r31 | |
2679 | addi.l r15, 8, r15 | |
2680 | andc r1, r31, r1 | |
2681 | blink tr1, r63 | |
2682 | LOCAL(ct_r7_pop): /* Pop r7 from the stack. */ | |
2683 | ldx.q r15, r63, r7 | |
2684 | movi 1 << 12, r31 | |
2685 | addi.l r15, 8, r15 | |
2686 | andc r1, r31, r1 | |
2687 | blink tr1, r63 | |
2688 | LOCAL(ct_r8_pop): /* Pop r8 from the stack. */ | |
2689 | ldx.q r15, r63, r8 | |
2690 | movi 1 << 8, r31 | |
2691 | addi.l r15, 8, r15 | |
2692 | andc r1, r31, r1 | |
2693 | blink tr1, r63 | |
2694 | LOCAL(ct_pop_seq): /* Pop a sequence of registers off the stack. */ | |
2695 | andi r1, 7 << 1, r30 | |
2696 | movi (LOCAL(ct_end_of_pop_seq) >> 16) & 65535, r32 | |
2697 | shlli r30, 2, r31 | |
2698 | shori LOCAL(ct_end_of_pop_seq) & 65535, r32 | |
2699 | sub.l r32, r31, r33 | |
2700 | ptabs/l r33, tr2 | |
2701 | blink tr2, r63 | |
2702 | LOCAL(ct_start_of_pop_seq): /* Beginning of pop sequence. */ | |
2703 | ldx.q r15, r63, r3 | |
2704 | addi.l r15, 8, r15 | |
2705 | ldx.q r15, r63, r4 | |
2706 | addi.l r15, 8, r15 | |
2707 | ldx.q r15, r63, r5 | |
2708 | addi.l r15, 8, r15 | |
2709 | ldx.q r15, r63, r6 | |
2710 | addi.l r15, 8, r15 | |
2711 | ldx.q r15, r63, r7 | |
2712 | addi.l r15, 8, r15 | |
2713 | ldx.q r15, r63, r8 | |
2714 | addi.l r15, 8, r15 | |
2715 | LOCAL(ct_r9_pop): /* Pop r9 from the stack. */ | |
2716 | ldx.q r15, r63, r9 | |
2717 | addi.l r15, 8, r15 | |
2718 | LOCAL(ct_end_of_pop_seq): /* Label used to compute first pop instruction. */ | |
2719 | LOCAL(ct_check_tramp): /* Check whether we need a trampoline. */ | |
2720 | pt/u LOCAL(ct_ret_wide), tr2 | |
2721 | andi r1, 1, r1 | |
2722 | bne/u r1, r63, tr2 | |
2723 | LOCAL(ct_call_func): /* Just branch to the function. */ | |
2724 | blink tr0, r63 | |
2725 | LOCAL(ct_ret_wide): /* Call the function, so that we can unpack its | |
2726 | 64-bit return value. */ | |
2727 | add.l r18, r63, r10 | |
2728 | blink tr0, r18 | |
2729 | ptabs r10, tr0 | |
2730 | #if __LITTLE_ENDIAN__ | |
2731 | shari r2, 32, r3 | |
2732 | add.l r2, r63, r2 | |
2733 | #else | |
2734 | add.l r2, r63, r3 | |
2735 | shari r2, 32, r2 | |
2736 | #endif | |
2737 | blink tr0, r63 | |
805e22b2 | 2738 | |
2739 | ENDFUNC(GLOBAL(GCC_shcompact_call_trampoline)) | |
87e19636 | 2740 | #endif /* L_shcompact_call_trampoline */ |
2741 | ||
2742 | #ifdef L_shcompact_return_trampoline | |
2743 | /* This function does the converse of the code in `ret_wide' | |
2744 | above. It is tail-called by SHcompact functions returning | |
2745 | 64-bit non-floating-point values, to pack the 32-bit values in | |
2746 | r2 and r3 into r2. */ | |
2747 | ||
2748 | .mode SHmedia | |
2749 | .section .text..SHmedia32, "ax" | |
2750 | .align 2 | |
2751 | .global GLOBAL(GCC_shcompact_return_trampoline) | |
59312820 | 2752 | HIDDEN_FUNC(GLOBAL(GCC_shcompact_return_trampoline)) |
87e19636 | 2753 | GLOBAL(GCC_shcompact_return_trampoline): |
2754 | ptabs/l r18, tr0 | |
2755 | #if __LITTLE_ENDIAN__ | |
2756 | addz.l r2, r63, r2 | |
2757 | shlli r3, 32, r3 | |
2758 | #else | |
2759 | addz.l r3, r63, r3 | |
2760 | shlli r2, 32, r2 | |
2761 | #endif | |
2762 | or r3, r2, r2 | |
2763 | blink tr0, r63 | |
805e22b2 | 2764 | |
2765 | ENDFUNC(GLOBAL(GCC_shcompact_return_trampoline)) | |
87e19636 | 2766 | #endif /* L_shcompact_return_trampoline */ |
2767 | ||
2768 | #ifdef L_shcompact_incoming_args | |
2769 | .section .rodata | |
2770 | .align 1 | |
2771 | LOCAL(ia_main_table): | |
2772 | .word 1 /* Invalid, just loop */ | |
2773 | .word LOCAL(ia_r2_ld) - datalabel LOCAL(ia_main_label) | |
2774 | .word LOCAL(ia_r2_push) - datalabel LOCAL(ia_main_label) | |
2775 | .word 1 /* Invalid, just loop */ | |
2776 | .word LOCAL(ia_r3_ld) - datalabel LOCAL(ia_main_label) | |
2777 | .word LOCAL(ia_r3_push) - datalabel LOCAL(ia_main_label) | |
2778 | .word 1 /* Invalid, just loop */ | |
2779 | .word LOCAL(ia_r4_ld) - datalabel LOCAL(ia_main_label) | |
2780 | .word LOCAL(ia_r4_push) - datalabel LOCAL(ia_main_label) | |
2781 | .word 1 /* Invalid, just loop */ | |
2782 | .word LOCAL(ia_r5_ld) - datalabel LOCAL(ia_main_label) | |
2783 | .word LOCAL(ia_r5_push) - datalabel LOCAL(ia_main_label) | |
2784 | .word 1 /* Invalid, just loop */ | |
2785 | .word 1 /* Invalid, just loop */ | |
2786 | .word LOCAL(ia_r6_ld) - datalabel LOCAL(ia_main_label) | |
2787 | .word LOCAL(ia_r6_push) - datalabel LOCAL(ia_main_label) | |
2788 | .word 1 /* Invalid, just loop */ | |
2789 | .word 1 /* Invalid, just loop */ | |
2790 | .word LOCAL(ia_r7_ld) - datalabel LOCAL(ia_main_label) | |
2791 | .word LOCAL(ia_r7_push) - datalabel LOCAL(ia_main_label) | |
2792 | .word 1 /* Invalid, just loop */ | |
2793 | .word 1 /* Invalid, just loop */ | |
2794 | .word LOCAL(ia_r8_ld) - datalabel LOCAL(ia_main_label) | |
2795 | .word LOCAL(ia_r8_push) - datalabel LOCAL(ia_main_label) | |
2796 | .word 1 /* Invalid, just loop */ | |
2797 | .word 1 /* Invalid, just loop */ | |
2798 | .word LOCAL(ia_r9_ld) - datalabel LOCAL(ia_main_label) | |
2799 | .word LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label) | |
2800 | .word LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label) | |
2801 | .word LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label) | |
2802 | .word LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label) | |
2803 | .word LOCAL(ia_return) - datalabel LOCAL(ia_main_label) | |
2804 | .word LOCAL(ia_return) - datalabel LOCAL(ia_main_label) | |
2805 | .mode SHmedia | |
2806 | .section .text..SHmedia32, "ax" | |
2807 | .align 2 | |
2808 | ||
2809 | /* This function stores 64-bit general-purpose registers back in | |
101d4704 | 2810 | the stack, and loads the address in which each register |
2811 | was stored into itself. The lower 32 bits of r17 hold the address | |
2812 | to begin storing, and the upper 32 bits of r17 hold the cookie. | |
2813 | Its execution time is linear on the | |
87e19636 | 2814 | number of registers that actually have to be copied, and it is |
2815 | optimized for structures larger than 64 bits, as opposed to | |
0924aa1d | 2816 | individual `long long' arguments. See sh.h for details on the |
87e19636 | 2817 | actual bit pattern. */ |
2818 | ||
2819 | .global GLOBAL(GCC_shcompact_incoming_args) | |
59312820 | 2820 | FUNC(GLOBAL(GCC_shcompact_incoming_args)) |
87e19636 | 2821 | GLOBAL(GCC_shcompact_incoming_args): |
2822 | ptabs/l r18, tr0 /* Prepare to return. */ | |
2823 | shlri r17, 32, r0 /* Load the cookie. */ | |
101d4704 | 2824 | movi ((datalabel LOCAL(ia_main_table) - 31 * 2) >> 16) & 65535, r43 |
87e19636 | 2825 | pt/l LOCAL(ia_loop), tr1 |
2826 | add.l r17, r63, r17 | |
101d4704 | 2827 | shori ((datalabel LOCAL(ia_main_table) - 31 * 2)) & 65535, r43 |
87e19636 | 2828 | LOCAL(ia_loop): |
101d4704 | 2829 | nsb r0, r36 |
2830 | shlli r36, 1, r37 | |
2831 | ldx.w r43, r37, r38 | |
87e19636 | 2832 | LOCAL(ia_main_label): |
101d4704 | 2833 | ptrel/l r38, tr2 |
87e19636 | 2834 | blink tr2, r63 |
2835 | LOCAL(ia_r2_ld): /* Store r2 and load its address. */ | |
101d4704 | 2836 | movi 3, r38 |
2837 | shlli r38, 29, r39 | |
2838 | and r0, r39, r40 | |
2839 | andc r0, r39, r0 | |
87e19636 | 2840 | stx.q r17, r63, r2 |
2841 | add.l r17, r63, r2 | |
2842 | addi.l r17, 8, r17 | |
101d4704 | 2843 | beq/u r39, r40, tr1 |
87e19636 | 2844 | LOCAL(ia_r3_ld): /* Store r3 and load its address. */ |
101d4704 | 2845 | movi 3, r38 |
2846 | shlli r38, 26, r39 | |
2847 | and r0, r39, r40 | |
2848 | andc r0, r39, r0 | |
87e19636 | 2849 | stx.q r17, r63, r3 |
2850 | add.l r17, r63, r3 | |
2851 | addi.l r17, 8, r17 | |
101d4704 | 2852 | beq/u r39, r40, tr1 |
87e19636 | 2853 | LOCAL(ia_r4_ld): /* Store r4 and load its address. */ |
101d4704 | 2854 | movi 3, r38 |
2855 | shlli r38, 23, r39 | |
2856 | and r0, r39, r40 | |
2857 | andc r0, r39, r0 | |
87e19636 | 2858 | stx.q r17, r63, r4 |
2859 | add.l r17, r63, r4 | |
2860 | addi.l r17, 8, r17 | |
101d4704 | 2861 | beq/u r39, r40, tr1 |
87e19636 | 2862 | LOCAL(ia_r5_ld): /* Store r5 and load its address. */ |
101d4704 | 2863 | movi 3, r38 |
2864 | shlli r38, 20, r39 | |
2865 | and r0, r39, r40 | |
2866 | andc r0, r39, r0 | |
87e19636 | 2867 | stx.q r17, r63, r5 |
2868 | add.l r17, r63, r5 | |
2869 | addi.l r17, 8, r17 | |
101d4704 | 2870 | beq/u r39, r40, tr1 |
87e19636 | 2871 | LOCAL(ia_r6_ld): /* Store r6 and load its address. */ |
101d4704 | 2872 | movi 3, r38 |
2873 | shlli r38, 16, r39 | |
2874 | and r0, r39, r40 | |
2875 | andc r0, r39, r0 | |
87e19636 | 2876 | stx.q r17, r63, r6 |
2877 | add.l r17, r63, r6 | |
2878 | addi.l r17, 8, r17 | |
101d4704 | 2879 | beq/u r39, r40, tr1 |
87e19636 | 2880 | LOCAL(ia_r7_ld): /* Store r7 and load its address. */ |
101d4704 | 2881 | movi 3 << 12, r39 |
2882 | and r0, r39, r40 | |
2883 | andc r0, r39, r0 | |
87e19636 | 2884 | stx.q r17, r63, r7 |
2885 | add.l r17, r63, r7 | |
2886 | addi.l r17, 8, r17 | |
101d4704 | 2887 | beq/u r39, r40, tr1 |
87e19636 | 2888 | LOCAL(ia_r8_ld): /* Store r8 and load its address. */ |
101d4704 | 2889 | movi 3 << 8, r39 |
2890 | and r0, r39, r40 | |
2891 | andc r0, r39, r0 | |
87e19636 | 2892 | stx.q r17, r63, r8 |
2893 | add.l r17, r63, r8 | |
2894 | addi.l r17, 8, r17 | |
101d4704 | 2895 | beq/u r39, r40, tr1 |
87e19636 | 2896 | LOCAL(ia_r9_ld): /* Store r9 and load its address. */ |
2897 | stx.q r17, r63, r9 | |
2898 | add.l r17, r63, r9 | |
2899 | blink tr0, r63 | |
2900 | LOCAL(ia_r2_push): /* Push r2 onto the stack. */ | |
101d4704 | 2901 | movi 1, r38 |
2902 | shlli r38, 29, r39 | |
2903 | andc r0, r39, r0 | |
87e19636 | 2904 | stx.q r17, r63, r2 |
2905 | addi.l r17, 8, r17 | |
2906 | blink tr1, r63 | |
2907 | LOCAL(ia_r3_push): /* Push r3 onto the stack. */ | |
101d4704 | 2908 | movi 1, r38 |
2909 | shlli r38, 26, r39 | |
2910 | andc r0, r39, r0 | |
87e19636 | 2911 | stx.q r17, r63, r3 |
2912 | addi.l r17, 8, r17 | |
2913 | blink tr1, r63 | |
2914 | LOCAL(ia_r4_push): /* Push r4 onto the stack. */ | |
101d4704 | 2915 | movi 1, r38 |
2916 | shlli r38, 23, r39 | |
2917 | andc r0, r39, r0 | |
87e19636 | 2918 | stx.q r17, r63, r4 |
2919 | addi.l r17, 8, r17 | |
2920 | blink tr1, r63 | |
2921 | LOCAL(ia_r5_push): /* Push r5 onto the stack. */ | |
101d4704 | 2922 | movi 1, r38 |
2923 | shlli r38, 20, r39 | |
2924 | andc r0, r39, r0 | |
87e19636 | 2925 | stx.q r17, r63, r5 |
2926 | addi.l r17, 8, r17 | |
2927 | blink tr1, r63 | |
2928 | LOCAL(ia_r6_push): /* Push r6 onto the stack. */ | |
101d4704 | 2929 | movi 1, r38 |
2930 | shlli r38, 16, r39 | |
2931 | andc r0, r39, r0 | |
87e19636 | 2932 | stx.q r17, r63, r6 |
2933 | addi.l r17, 8, r17 | |
2934 | blink tr1, r63 | |
2935 | LOCAL(ia_r7_push): /* Push r7 onto the stack. */ | |
101d4704 | 2936 | movi 1 << 12, r39 |
2937 | andc r0, r39, r0 | |
87e19636 | 2938 | stx.q r17, r63, r7 |
2939 | addi.l r17, 8, r17 | |
2940 | blink tr1, r63 | |
2941 | LOCAL(ia_r8_push): /* Push r8 onto the stack. */ | |
101d4704 | 2942 | movi 1 << 8, r39 |
2943 | andc r0, r39, r0 | |
87e19636 | 2944 | stx.q r17, r63, r8 |
2945 | addi.l r17, 8, r17 | |
2946 | blink tr1, r63 | |
2947 | LOCAL(ia_push_seq): /* Push a sequence of registers onto the stack. */ | |
101d4704 | 2948 | andi r0, 7 << 1, r38 |
2949 | movi (LOCAL(ia_end_of_push_seq) >> 16) & 65535, r40 | |
2950 | shlli r38, 2, r39 | |
2951 | shori LOCAL(ia_end_of_push_seq) & 65535, r40 | |
2952 | sub.l r40, r39, r41 | |
2953 | ptabs/l r41, tr2 | |
87e19636 | 2954 | blink tr2, r63 |
2955 | LOCAL(ia_stack_of_push_seq): /* Beginning of push sequence. */ | |
2956 | stx.q r17, r63, r3 | |
2957 | addi.l r17, 8, r17 | |
2958 | stx.q r17, r63, r4 | |
2959 | addi.l r17, 8, r17 | |
2960 | stx.q r17, r63, r5 | |
2961 | addi.l r17, 8, r17 | |
2962 | stx.q r17, r63, r6 | |
2963 | addi.l r17, 8, r17 | |
2964 | stx.q r17, r63, r7 | |
2965 | addi.l r17, 8, r17 | |
2966 | stx.q r17, r63, r8 | |
2967 | addi.l r17, 8, r17 | |
2968 | LOCAL(ia_r9_push): /* Push r9 onto the stack. */ | |
2969 | stx.q r17, r63, r9 | |
2970 | LOCAL(ia_return): /* Return. */ | |
2971 | blink tr0, r63 | |
2972 | LOCAL(ia_end_of_push_seq): /* Label used to compute the first push instruction. */ | |
805e22b2 | 2973 | ENDFUNC(GLOBAL(GCC_shcompact_incoming_args)) |
87e19636 | 2974 | #endif /* L_shcompact_incoming_args */ |
2975 | #endif | |
2976 | #if __SH5__ | |
2977 | #ifdef L_nested_trampoline | |
2978 | #if __SH5__ == 32 | |
2979 | .section .text..SHmedia32,"ax" | |
2980 | #else | |
2981 | .text | |
2982 | #endif | |
2983 | .align 3 /* It is copied in units of 8 bytes in SHmedia mode. */ | |
2984 | .global GLOBAL(GCC_nested_trampoline) | |
59312820 | 2985 | HIDDEN_FUNC(GLOBAL(GCC_nested_trampoline)) |
87e19636 | 2986 | GLOBAL(GCC_nested_trampoline): |
2987 | .mode SHmedia | |
2988 | ptrel/u r63, tr0 | |
2989 | gettr tr0, r0 | |
2990 | #if __SH5__ == 64 | |
2991 | ld.q r0, 24, r1 | |
2992 | #else | |
2993 | ld.l r0, 24, r1 | |
2994 | #endif | |
2995 | ptabs/l r1, tr1 | |
2996 | #if __SH5__ == 64 | |
2997 | ld.q r0, 32, r1 | |
2998 | #else | |
2999 | ld.l r0, 28, r1 | |
3000 | #endif | |
3001 | blink tr1, r63 | |
805e22b2 | 3002 | |
3003 | ENDFUNC(GLOBAL(GCC_nested_trampoline)) | |
87e19636 | 3004 | #endif /* L_nested_trampoline */ |
3005 | #endif /* __SH5__ */ | |
3006 | #if __SH5__ == 32 | |
3007 | #ifdef L_push_pop_shmedia_regs | |
3008 | .section .text..SHmedia32,"ax" | |
3009 | .mode SHmedia | |
3010 | .align 2 | |
3011 | #ifndef __SH4_NOFPU__ | |
3012 | .global GLOBAL(GCC_push_shmedia_regs) | |
805e22b2 | 3013 | FUNC(GLOBAL(GCC_push_shmedia_regs)) |
87e19636 | 3014 | GLOBAL(GCC_push_shmedia_regs): |
3015 | addi.l r15, -14*8, r15 | |
3016 | fst.d r15, 13*8, dr62 | |
3017 | fst.d r15, 12*8, dr60 | |
3018 | fst.d r15, 11*8, dr58 | |
3019 | fst.d r15, 10*8, dr56 | |
3020 | fst.d r15, 9*8, dr54 | |
3021 | fst.d r15, 8*8, dr52 | |
3022 | fst.d r15, 7*8, dr50 | |
3023 | fst.d r15, 6*8, dr48 | |
3024 | fst.d r15, 5*8, dr46 | |
3025 | fst.d r15, 4*8, dr44 | |
3026 | fst.d r15, 3*8, dr42 | |
3027 | fst.d r15, 2*8, dr40 | |
3028 | fst.d r15, 1*8, dr38 | |
3029 | fst.d r15, 0*8, dr36 | |
59312820 | 3030 | #else /* ! __SH4_NOFPU__ */ |
87e19636 | 3031 | .global GLOBAL(GCC_push_shmedia_regs_nofpu) |
805e22b2 | 3032 | FUNC(GLOBAL(GCC_push_shmedia_regs_nofpu)) |
87e19636 | 3033 | GLOBAL(GCC_push_shmedia_regs_nofpu): |
59312820 | 3034 | #endif /* ! __SH4_NOFPU__ */ |
87e19636 | 3035 | ptabs/l r18, tr0 |
3036 | addi.l r15, -27*8, r15 | |
3037 | gettr tr7, r62 | |
3038 | gettr tr6, r61 | |
3039 | gettr tr5, r60 | |
3040 | st.q r15, 26*8, r62 | |
3041 | st.q r15, 25*8, r61 | |
3042 | st.q r15, 24*8, r60 | |
3043 | st.q r15, 23*8, r59 | |
3044 | st.q r15, 22*8, r58 | |
3045 | st.q r15, 21*8, r57 | |
3046 | st.q r15, 20*8, r56 | |
3047 | st.q r15, 19*8, r55 | |
3048 | st.q r15, 18*8, r54 | |
3049 | st.q r15, 17*8, r53 | |
3050 | st.q r15, 16*8, r52 | |
3051 | st.q r15, 15*8, r51 | |
3052 | st.q r15, 14*8, r50 | |
3053 | st.q r15, 13*8, r49 | |
3054 | st.q r15, 12*8, r48 | |
3055 | st.q r15, 11*8, r47 | |
3056 | st.q r15, 10*8, r46 | |
3057 | st.q r15, 9*8, r45 | |
3058 | st.q r15, 8*8, r44 | |
3059 | st.q r15, 7*8, r35 | |
3060 | st.q r15, 6*8, r34 | |
3061 | st.q r15, 5*8, r33 | |
3062 | st.q r15, 4*8, r32 | |
3063 | st.q r15, 3*8, r31 | |
3064 | st.q r15, 2*8, r30 | |
3065 | st.q r15, 1*8, r29 | |
3066 | st.q r15, 0*8, r28 | |
3067 | blink tr0, r63 | |
805e22b2 | 3068 | #ifndef __SH4_NOFPU__ |
3069 | ENDFUNC(GLOBAL(GCC_push_shmedia_regs)) | |
59312820 | 3070 | #else |
805e22b2 | 3071 | ENDFUNC(GLOBAL(GCC_push_shmedia_regs_nofpu)) |
59312820 | 3072 | #endif |
3073 | #ifndef __SH4_NOFPU__ | |
87e19636 | 3074 | .global GLOBAL(GCC_pop_shmedia_regs) |
805e22b2 | 3075 | FUNC(GLOBAL(GCC_pop_shmedia_regs)) |
87e19636 | 3076 | GLOBAL(GCC_pop_shmedia_regs): |
3077 | pt .L0, tr1 | |
3078 | movi 41*8, r0 | |
3079 | fld.d r15, 40*8, dr62 | |
3080 | fld.d r15, 39*8, dr60 | |
3081 | fld.d r15, 38*8, dr58 | |
3082 | fld.d r15, 37*8, dr56 | |
3083 | fld.d r15, 36*8, dr54 | |
3084 | fld.d r15, 35*8, dr52 | |
3085 | fld.d r15, 34*8, dr50 | |
3086 | fld.d r15, 33*8, dr48 | |
3087 | fld.d r15, 32*8, dr46 | |
3088 | fld.d r15, 31*8, dr44 | |
3089 | fld.d r15, 30*8, dr42 | |
3090 | fld.d r15, 29*8, dr40 | |
3091 | fld.d r15, 28*8, dr38 | |
3092 | fld.d r15, 27*8, dr36 | |
3093 | blink tr1, r63 | |
59312820 | 3094 | #else /* ! __SH4_NOFPU__ */ |
87e19636 | 3095 | .global GLOBAL(GCC_pop_shmedia_regs_nofpu) |
805e22b2 | 3096 | FUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu)) |
87e19636 | 3097 | GLOBAL(GCC_pop_shmedia_regs_nofpu): |
59312820 | 3098 | #endif /* ! __SH4_NOFPU__ */ |
87e19636 | 3099 | movi 27*8, r0 |
3100 | .L0: | |
3101 | ptabs r18, tr0 | |
3102 | ld.q r15, 26*8, r62 | |
3103 | ld.q r15, 25*8, r61 | |
3104 | ld.q r15, 24*8, r60 | |
3105 | ptabs r62, tr7 | |
3106 | ptabs r61, tr6 | |
3107 | ptabs r60, tr5 | |
3108 | ld.q r15, 23*8, r59 | |
3109 | ld.q r15, 22*8, r58 | |
3110 | ld.q r15, 21*8, r57 | |
3111 | ld.q r15, 20*8, r56 | |
3112 | ld.q r15, 19*8, r55 | |
3113 | ld.q r15, 18*8, r54 | |
3114 | ld.q r15, 17*8, r53 | |
3115 | ld.q r15, 16*8, r52 | |
3116 | ld.q r15, 15*8, r51 | |
3117 | ld.q r15, 14*8, r50 | |
3118 | ld.q r15, 13*8, r49 | |
3119 | ld.q r15, 12*8, r48 | |
3120 | ld.q r15, 11*8, r47 | |
3121 | ld.q r15, 10*8, r46 | |
3122 | ld.q r15, 9*8, r45 | |
3123 | ld.q r15, 8*8, r44 | |
3124 | ld.q r15, 7*8, r35 | |
3125 | ld.q r15, 6*8, r34 | |
3126 | ld.q r15, 5*8, r33 | |
3127 | ld.q r15, 4*8, r32 | |
3128 | ld.q r15, 3*8, r31 | |
3129 | ld.q r15, 2*8, r30 | |
3130 | ld.q r15, 1*8, r29 | |
3131 | ld.q r15, 0*8, r28 | |
3132 | add.l r15, r0, r15 | |
3133 | blink tr0, r63 | |
805e22b2 | 3134 | |
3135 | #ifndef __SH4_NOFPU__ | |
3136 | ENDFUNC(GLOBAL(GCC_pop_shmedia_regs)) | |
59312820 | 3137 | #else |
805e22b2 | 3138 | ENDFUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu)) |
59312820 | 3139 | #endif |
87e19636 | 3140 | #endif /* __SH5__ == 32 */ |
3141 | #endif /* L_push_pop_shmedia_regs */ | |
59312820 | 3142 | |
59312820 | 3143 | #ifdef L_div_table |
d6005df3 | 3144 | #if __SH5__ |
273fffd6 | 3145 | #if defined(__pic__) && __SHMEDIA__ |
59312820 | 3146 | .global GLOBAL(sdivsi3) |
3147 | FUNC(GLOBAL(sdivsi3)) | |
3148 | #if __SH5__ == 32 | |
3149 | .section .text..SHmedia32,"ax" | |
3150 | #else | |
3151 | .text | |
3152 | #endif | |
3153 | #if 0 | |
3154 | /* ??? FIXME: Presumably due to a linker bug, exporting data symbols | |
3155 | in a text section does not work (at least for shared libraries): | |
3156 | the linker sets the LSB of the address as if this was SHmedia code. */ | |
3157 | #define TEXT_DATA_BUG | |
3158 | #endif | |
3159 | .align 2 | |
3160 | // inputs: r4,r5 | |
3161 | // clobbered: r1,r18,r19,r20,r21,r25,tr0 | |
3162 | // result in r0 | |
3163 | .global GLOBAL(sdivsi3) | |
3164 | GLOBAL(sdivsi3): | |
3165 | #ifdef TEXT_DATA_BUG | |
3166 | ptb datalabel Local_div_table,tr0 | |
3167 | #else | |
3168 | ptb GLOBAL(div_table_internal),tr0 | |
3169 | #endif | |
3170 | nsb r5, r1 | |
3171 | shlld r5, r1, r25 // normalize; [-2 ..1, 1..2) in s2.62 | |
3172 | shari r25, 58, r21 // extract 5(6) bit index (s2.4 with hole -1..1) | |
3173 | /* bubble */ | |
3174 | gettr tr0,r20 | |
3175 | ldx.ub r20, r21, r19 // u0.8 | |
3176 | shari r25, 32, r25 // normalize to s2.30 | |
3177 | shlli r21, 1, r21 | |
3178 | muls.l r25, r19, r19 // s2.38 | |
3179 | ldx.w r20, r21, r21 // s2.14 | |
3180 | ptabs r18, tr0 | |
3181 | shari r19, 24, r19 // truncate to s2.14 | |
3182 | sub r21, r19, r19 // some 11 bit inverse in s1.14 | |
3183 | muls.l r19, r19, r21 // u0.28 | |
3184 | sub r63, r1, r1 | |
3185 | addi r1, 92, r1 | |
3186 | muls.l r25, r21, r18 // s2.58 | |
3187 | shlli r19, 45, r19 // multiply by two and convert to s2.58 | |
3188 | /* bubble */ | |
3189 | sub r19, r18, r18 | |
3190 | shari r18, 28, r18 // some 22 bit inverse in s1.30 | |
3191 | muls.l r18, r25, r0 // s2.60 | |
3192 | muls.l r18, r4, r25 // s32.30 | |
3193 | /* bubble */ | |
3194 | shari r0, 16, r19 // s-16.44 | |
3195 | muls.l r19, r18, r19 // s-16.74 | |
3196 | shari r25, 63, r0 | |
3197 | shari r4, 14, r18 // s19.-14 | |
3198 | shari r19, 30, r19 // s-16.44 | |
3199 | muls.l r19, r18, r19 // s15.30 | |
3200 | xor r21, r0, r21 // You could also use the constant 1 << 27. | |
3201 | add r21, r25, r21 | |
3202 | sub r21, r19, r21 | |
3203 | shard r21, r1, r21 | |
3204 | sub r21, r0, r0 | |
3205 | blink tr0, r63 | |
3206 | ENDFUNC(GLOBAL(sdivsi3)) | |
3207 | /* This table has been generated by divtab.c . | |
3208 | Defects for bias -330: | |
3209 | Max defect: 6.081536e-07 at -1.000000e+00 | |
3210 | Min defect: 2.849516e-08 at 1.030651e+00 | |
3211 | Max 2nd step defect: 9.606539e-12 at -1.000000e+00 | |
3212 | Min 2nd step defect: 0.000000e+00 at 0.000000e+00 | |
3213 | Defect at 1: 1.238659e-07 | |
3214 | Defect at -2: 1.061708e-07 */ | |
3215 | #else /* ! __pic__ || ! __SHMEDIA__ */ | |
3216 | .section .rodata | |
3217 | #endif /* __pic__ */ | |
273fffd6 | 3218 | #if defined(TEXT_DATA_BUG) && defined(__pic__) && __SHMEDIA__ |
59312820 | 3219 | .balign 2 |
3220 | .type Local_div_table,@object | |
3221 | .size Local_div_table,128 | |
3222 | /* negative division constants */ | |
3223 | .word -16638 | |
3224 | .word -17135 | |
3225 | .word -17737 | |
3226 | .word -18433 | |
3227 | .word -19103 | |
3228 | .word -19751 | |
3229 | .word -20583 | |
3230 | .word -21383 | |
3231 | .word -22343 | |
3232 | .word -23353 | |
3233 | .word -24407 | |
3234 | .word -25582 | |
3235 | .word -26863 | |
3236 | .word -28382 | |
3237 | .word -29965 | |
3238 | .word -31800 | |
3239 | /* negative division factors */ | |
3240 | .byte 66 | |
3241 | .byte 70 | |
3242 | .byte 75 | |
3243 | .byte 81 | |
3244 | .byte 87 | |
3245 | .byte 93 | |
3246 | .byte 101 | |
3247 | .byte 109 | |
3248 | .byte 119 | |
3249 | .byte 130 | |
3250 | .byte 142 | |
3251 | .byte 156 | |
3252 | .byte 172 | |
3253 | .byte 192 | |
3254 | .byte 214 | |
3255 | .byte 241 | |
3256 | .skip 16 | |
3257 | Local_div_table: | |
3258 | .skip 16 | |
3259 | /* positive division factors */ | |
3260 | .byte 241 | |
3261 | .byte 214 | |
3262 | .byte 192 | |
3263 | .byte 172 | |
3264 | .byte 156 | |
3265 | .byte 142 | |
3266 | .byte 130 | |
3267 | .byte 119 | |
3268 | .byte 109 | |
3269 | .byte 101 | |
3270 | .byte 93 | |
3271 | .byte 87 | |
3272 | .byte 81 | |
3273 | .byte 75 | |
3274 | .byte 70 | |
3275 | .byte 66 | |
3276 | /* positive division constants */ | |
3277 | .word 31801 | |
3278 | .word 29966 | |
3279 | .word 28383 | |
3280 | .word 26864 | |
3281 | .word 25583 | |
3282 | .word 24408 | |
3283 | .word 23354 | |
3284 | .word 22344 | |
3285 | .word 21384 | |
3286 | .word 20584 | |
3287 | .word 19752 | |
3288 | .word 19104 | |
3289 | .word 18434 | |
3290 | .word 17738 | |
3291 | .word 17136 | |
3292 | .word 16639 | |
3293 | .section .rodata | |
3294 | #endif /* TEXT_DATA_BUG */ | |
3295 | .balign 2 | |
3296 | .type GLOBAL(div_table),@object | |
3297 | .size GLOBAL(div_table),128 | |
3298 | /* negative division constants */ | |
3299 | .word -16638 | |
3300 | .word -17135 | |
3301 | .word -17737 | |
3302 | .word -18433 | |
3303 | .word -19103 | |
3304 | .word -19751 | |
3305 | .word -20583 | |
3306 | .word -21383 | |
3307 | .word -22343 | |
3308 | .word -23353 | |
3309 | .word -24407 | |
3310 | .word -25582 | |
3311 | .word -26863 | |
3312 | .word -28382 | |
3313 | .word -29965 | |
3314 | .word -31800 | |
3315 | /* negative division factors */ | |
3316 | .byte 66 | |
3317 | .byte 70 | |
3318 | .byte 75 | |
3319 | .byte 81 | |
3320 | .byte 87 | |
3321 | .byte 93 | |
3322 | .byte 101 | |
3323 | .byte 109 | |
3324 | .byte 119 | |
3325 | .byte 130 | |
3326 | .byte 142 | |
3327 | .byte 156 | |
3328 | .byte 172 | |
3329 | .byte 192 | |
3330 | .byte 214 | |
3331 | .byte 241 | |
3332 | .skip 16 | |
3333 | .global GLOBAL(div_table) | |
3334 | GLOBAL(div_table): | |
3335 | HIDDEN_ALIAS(div_table_internal,div_table) | |
3336 | .skip 16 | |
3337 | /* positive division factors */ | |
3338 | .byte 241 | |
3339 | .byte 214 | |
3340 | .byte 192 | |
3341 | .byte 172 | |
3342 | .byte 156 | |
3343 | .byte 142 | |
3344 | .byte 130 | |
3345 | .byte 119 | |
3346 | .byte 109 | |
3347 | .byte 101 | |
3348 | .byte 93 | |
3349 | .byte 87 | |
3350 | .byte 81 | |
3351 | .byte 75 | |
3352 | .byte 70 | |
3353 | .byte 66 | |
3354 | /* positive division constants */ | |
3355 | .word 31801 | |
3356 | .word 29966 | |
3357 | .word 28383 | |
3358 | .word 26864 | |
3359 | .word 25583 | |
3360 | .word 24408 | |
3361 | .word 23354 | |
3362 | .word 22344 | |
3363 | .word 21384 | |
3364 | .word 20584 | |
3365 | .word 19752 | |
3366 | .word 19104 | |
3367 | .word 18434 | |
3368 | .word 17738 | |
3369 | .word 17136 | |
3370 | .word 16639 | |
d6005df3 | 3371 | |
9fe603c3 | 3372 | #elif defined (__SH2A__) || defined (__SH3__) || defined (__SH3E__) || defined (__SH4__) || defined (__SH4_SINGLE__) || defined (__SH4_SINGLE_ONLY__) || defined (__SH4_NOFPU__) |
3373 | /* This code uses shld, thus is not suitable for SH1 / SH2. */ | |
d6005df3 | 3374 | |
3375 | /* Signed / unsigned division without use of FPU, optimized for SH4. | |
3376 | Uses a lookup table for divisors in the range -128 .. +128, and | |
3377 | div1 with case distinction for larger divisors in three more ranges. | |
3378 | The code is lumped together with the table to allow the use of mova. */ | |
3379 | #ifdef __LITTLE_ENDIAN__ | |
3380 | #define L_LSB 0 | |
3381 | #define L_LSWMSB 1 | |
3382 | #define L_MSWLSB 2 | |
3383 | #else | |
3384 | #define L_LSB 3 | |
3385 | #define L_LSWMSB 2 | |
3386 | #define L_MSWLSB 1 | |
3387 | #endif | |
3388 | ||
3389 | .balign 4 | |
3390 | .global GLOBAL(udivsi3_i4i) | |
3391 | FUNC(GLOBAL(udivsi3_i4i)) | |
3392 | GLOBAL(udivsi3_i4i): | |
3393 | mov.w LOCAL(c128_w), r1 | |
3394 | div0u | |
3395 | mov r4,r0 | |
3396 | shlr8 r0 | |
3397 | cmp/hi r1,r5 | |
3398 | extu.w r5,r1 | |
3399 | bf LOCAL(udiv_le128) | |
3400 | cmp/eq r5,r1 | |
3401 | bf LOCAL(udiv_ge64k) | |
3402 | shlr r0 | |
3403 | mov r5,r1 | |
3404 | shll16 r5 | |
3405 | mov.l r4,@-r15 | |
3406 | div1 r5,r0 | |
3407 | mov.l r1,@-r15 | |
3408 | div1 r5,r0 | |
3409 | div1 r5,r0 | |
3410 | bra LOCAL(udiv_25) | |
3411 | div1 r5,r0 | |
3412 | ||
3413 | LOCAL(div_le128): | |
3414 | mova LOCAL(div_table_ix),r0 | |
3415 | bra LOCAL(div_le128_2) | |
3416 | mov.b @(r0,r5),r1 | |
3417 | LOCAL(udiv_le128): | |
3418 | mov.l r4,@-r15 | |
3419 | mova LOCAL(div_table_ix),r0 | |
3420 | mov.b @(r0,r5),r1 | |
3421 | mov.l r5,@-r15 | |
3422 | LOCAL(div_le128_2): | |
3423 | mova LOCAL(div_table_inv),r0 | |
3424 | mov.l @(r0,r1),r1 | |
3425 | mov r5,r0 | |
3426 | tst #0xfe,r0 | |
3427 | mova LOCAL(div_table_clz),r0 | |
3428 | dmulu.l r1,r4 | |
3429 | mov.b @(r0,r5),r1 | |
3430 | bt/s LOCAL(div_by_1) | |
3431 | mov r4,r0 | |
3432 | mov.l @r15+,r5 | |
3433 | sts mach,r0 | |
3434 | /* clrt */ | |
3435 | addc r4,r0 | |
3436 | mov.l @r15+,r4 | |
3437 | rotcr r0 | |
3438 | rts | |
3439 | shld r1,r0 | |
3440 | ||
3441 | LOCAL(div_by_1_neg): | |
3442 | neg r4,r0 | |
3443 | LOCAL(div_by_1): | |
3444 | mov.l @r15+,r5 | |
3445 | rts | |
3446 | mov.l @r15+,r4 | |
3447 | ||
3448 | LOCAL(div_ge64k): | |
3449 | bt/s LOCAL(div_r8) | |
3450 | div0u | |
3451 | shll8 r5 | |
3452 | bra LOCAL(div_ge64k_2) | |
3453 | div1 r5,r0 | |
3454 | LOCAL(udiv_ge64k): | |
3455 | cmp/hi r0,r5 | |
3456 | mov r5,r1 | |
3457 | bt LOCAL(udiv_r8) | |
3458 | shll8 r5 | |
3459 | mov.l r4,@-r15 | |
3460 | div1 r5,r0 | |
3461 | mov.l r1,@-r15 | |
3462 | LOCAL(div_ge64k_2): | |
3463 | div1 r5,r0 | |
3464 | mov.l LOCAL(zero_l),r1 | |
3465 | .rept 4 | |
3466 | div1 r5,r0 | |
3467 | .endr | |
3468 | mov.l r1,@-r15 | |
3469 | div1 r5,r0 | |
3470 | mov.w LOCAL(m256_w),r1 | |
3471 | div1 r5,r0 | |
3472 | mov.b r0,@(L_LSWMSB,r15) | |
3473 | xor r4,r0 | |
3474 | and r1,r0 | |
3475 | bra LOCAL(div_ge64k_end) | |
3476 | xor r4,r0 | |
3477 | ||
3478 | LOCAL(div_r8): | |
3479 | shll16 r4 | |
3480 | bra LOCAL(div_r8_2) | |
3481 | shll8 r4 | |
3482 | LOCAL(udiv_r8): | |
3483 | mov.l r4,@-r15 | |
3484 | shll16 r4 | |
3485 | clrt | |
3486 | shll8 r4 | |
3487 | mov.l r5,@-r15 | |
3488 | LOCAL(div_r8_2): | |
3489 | rotcl r4 | |
3490 | mov r0,r1 | |
3491 | div1 r5,r1 | |
3492 | mov r4,r0 | |
3493 | rotcl r0 | |
3494 | mov r5,r4 | |
3495 | div1 r5,r1 | |
3496 | .rept 5 | |
3497 | rotcl r0; div1 r5,r1 | |
3498 | .endr | |
3499 | rotcl r0 | |
3500 | mov.l @r15+,r5 | |
3501 | div1 r4,r1 | |
3502 | mov.l @r15+,r4 | |
3503 | rts | |
3504 | rotcl r0 | |
3505 | ||
3506 | ENDFUNC(GLOBAL(udivsi3_i4i)) | |
3507 | ||
3508 | .global GLOBAL(sdivsi3_i4i) | |
3509 | FUNC(GLOBAL(sdivsi3_i4i)) | |
3510 | /* This is link-compatible with a GLOBAL(sdivsi3) call, | |
3511 | but we effectively clobber only r1. */ | |
3512 | GLOBAL(sdivsi3_i4i): | |
3513 | mov.l r4,@-r15 | |
3514 | cmp/pz r5 | |
3515 | mov.w LOCAL(c128_w), r1 | |
3516 | bt/s LOCAL(pos_divisor) | |
3517 | cmp/pz r4 | |
3518 | mov.l r5,@-r15 | |
3519 | neg r5,r5 | |
3520 | bt/s LOCAL(neg_result) | |
3521 | cmp/hi r1,r5 | |
3522 | neg r4,r4 | |
3523 | LOCAL(pos_result): | |
3524 | extu.w r5,r0 | |
3525 | bf LOCAL(div_le128) | |
3526 | cmp/eq r5,r0 | |
3527 | mov r4,r0 | |
3528 | shlr8 r0 | |
3529 | bf/s LOCAL(div_ge64k) | |
3530 | cmp/hi r0,r5 | |
3531 | div0u | |
3532 | shll16 r5 | |
3533 | div1 r5,r0 | |
3534 | div1 r5,r0 | |
3535 | div1 r5,r0 | |
3536 | LOCAL(udiv_25): | |
3537 | mov.l LOCAL(zero_l),r1 | |
3538 | div1 r5,r0 | |
3539 | div1 r5,r0 | |
3540 | mov.l r1,@-r15 | |
3541 | .rept 3 | |
3542 | div1 r5,r0 | |
3543 | .endr | |
3544 | mov.b r0,@(L_MSWLSB,r15) | |
3545 | xtrct r4,r0 | |
3546 | swap.w r0,r0 | |
3547 | .rept 8 | |
3548 | div1 r5,r0 | |
3549 | .endr | |
3550 | mov.b r0,@(L_LSWMSB,r15) | |
3551 | LOCAL(div_ge64k_end): | |
3552 | .rept 8 | |
3553 | div1 r5,r0 | |
3554 | .endr | |
3555 | mov.l @r15+,r4 ! zero-extension and swap using LS unit. | |
3556 | extu.b r0,r0 | |
3557 | mov.l @r15+,r5 | |
3558 | or r4,r0 | |
3559 | mov.l @r15+,r4 | |
3560 | rts | |
3561 | rotcl r0 | |
3562 | ||
3563 | LOCAL(div_le128_neg): | |
3564 | tst #0xfe,r0 | |
3565 | mova LOCAL(div_table_ix),r0 | |
3566 | mov.b @(r0,r5),r1 | |
3567 | mova LOCAL(div_table_inv),r0 | |
3568 | bt/s LOCAL(div_by_1_neg) | |
3569 | mov.l @(r0,r1),r1 | |
3570 | mova LOCAL(div_table_clz),r0 | |
3571 | dmulu.l r1,r4 | |
3572 | mov.b @(r0,r5),r1 | |
3573 | mov.l @r15+,r5 | |
3574 | sts mach,r0 | |
3575 | /* clrt */ | |
3576 | addc r4,r0 | |
3577 | mov.l @r15+,r4 | |
3578 | rotcr r0 | |
3579 | shld r1,r0 | |
3580 | rts | |
3581 | neg r0,r0 | |
3582 | ||
3583 | LOCAL(pos_divisor): | |
3584 | mov.l r5,@-r15 | |
3585 | bt/s LOCAL(pos_result) | |
3586 | cmp/hi r1,r5 | |
3587 | neg r4,r4 | |
3588 | LOCAL(neg_result): | |
3589 | extu.w r5,r0 | |
3590 | bf LOCAL(div_le128_neg) | |
3591 | cmp/eq r5,r0 | |
3592 | mov r4,r0 | |
3593 | shlr8 r0 | |
3594 | bf/s LOCAL(div_ge64k_neg) | |
3595 | cmp/hi r0,r5 | |
3596 | div0u | |
3597 | mov.l LOCAL(zero_l),r1 | |
3598 | shll16 r5 | |
3599 | div1 r5,r0 | |
3600 | mov.l r1,@-r15 | |
3601 | .rept 7 | |
3602 | div1 r5,r0 | |
3603 | .endr | |
3604 | mov.b r0,@(L_MSWLSB,r15) | |
3605 | xtrct r4,r0 | |
3606 | swap.w r0,r0 | |
3607 | .rept 8 | |
3608 | div1 r5,r0 | |
3609 | .endr | |
3610 | mov.b r0,@(L_LSWMSB,r15) | |
3611 | LOCAL(div_ge64k_neg_end): | |
3612 | .rept 8 | |
3613 | div1 r5,r0 | |
3614 | .endr | |
3615 | mov.l @r15+,r4 ! zero-extension and swap using LS unit. | |
3616 | extu.b r0,r1 | |
3617 | mov.l @r15+,r5 | |
3618 | or r4,r1 | |
3619 | LOCAL(div_r8_neg_end): | |
3620 | mov.l @r15+,r4 | |
3621 | rotcl r1 | |
3622 | rts | |
3623 | neg r1,r0 | |
3624 | ||
3625 | LOCAL(div_ge64k_neg): | |
3626 | bt/s LOCAL(div_r8_neg) | |
3627 | div0u | |
3628 | shll8 r5 | |
3629 | mov.l LOCAL(zero_l),r1 | |
3630 | .rept 6 | |
3631 | div1 r5,r0 | |
3632 | .endr | |
3633 | mov.l r1,@-r15 | |
3634 | div1 r5,r0 | |
3635 | mov.w LOCAL(m256_w),r1 | |
3636 | div1 r5,r0 | |
3637 | mov.b r0,@(L_LSWMSB,r15) | |
3638 | xor r4,r0 | |
3639 | and r1,r0 | |
3640 | bra LOCAL(div_ge64k_neg_end) | |
3641 | xor r4,r0 | |
3642 | ||
3643 | LOCAL(c128_w): | |
3644 | .word 128 | |
3645 | ||
3646 | LOCAL(div_r8_neg): | |
3647 | clrt | |
3648 | shll16 r4 | |
3649 | mov r4,r1 | |
3650 | shll8 r1 | |
3651 | mov r5,r4 | |
3652 | .rept 7 | |
3653 | rotcl r1; div1 r5,r0 | |
3654 | .endr | |
3655 | mov.l @r15+,r5 | |
3656 | rotcl r1 | |
3657 | bra LOCAL(div_r8_neg_end) | |
3658 | div1 r4,r0 | |
3659 | ||
3660 | LOCAL(m256_w): | |
3661 | .word 0xff00 | |
3662 | /* This table has been generated by divtab-sh4.c. */ | |
3663 | .balign 4 | |
3664 | LOCAL(div_table_clz): | |
3665 | .byte 0 | |
3666 | .byte 1 | |
3667 | .byte 0 | |
3668 | .byte -1 | |
3669 | .byte -1 | |
3670 | .byte -2 | |
3671 | .byte -2 | |
3672 | .byte -2 | |
3673 | .byte -2 | |
3674 | .byte -3 | |
3675 | .byte -3 | |
3676 | .byte -3 | |
3677 | .byte -3 | |
3678 | .byte -3 | |
3679 | .byte -3 | |
3680 | .byte -3 | |
3681 | .byte -3 | |
3682 | .byte -4 | |
3683 | .byte -4 | |
3684 | .byte -4 | |
3685 | .byte -4 | |
3686 | .byte -4 | |
3687 | .byte -4 | |
3688 | .byte -4 | |
3689 | .byte -4 | |
3690 | .byte -4 | |
3691 | .byte -4 | |
3692 | .byte -4 | |
3693 | .byte -4 | |
3694 | .byte -4 | |
3695 | .byte -4 | |
3696 | .byte -4 | |
3697 | .byte -4 | |
3698 | .byte -5 | |
3699 | .byte -5 | |
3700 | .byte -5 | |
3701 | .byte -5 | |
3702 | .byte -5 | |
3703 | .byte -5 | |
3704 | .byte -5 | |
3705 | .byte -5 | |
3706 | .byte -5 | |
3707 | .byte -5 | |
3708 | .byte -5 | |
3709 | .byte -5 | |
3710 | .byte -5 | |
3711 | .byte -5 | |
3712 | .byte -5 | |
3713 | .byte -5 | |
3714 | .byte -5 | |
3715 | .byte -5 | |
3716 | .byte -5 | |
3717 | .byte -5 | |
3718 | .byte -5 | |
3719 | .byte -5 | |
3720 | .byte -5 | |
3721 | .byte -5 | |
3722 | .byte -5 | |
3723 | .byte -5 | |
3724 | .byte -5 | |
3725 | .byte -5 | |
3726 | .byte -5 | |
3727 | .byte -5 | |
3728 | .byte -5 | |
3729 | .byte -5 | |
3730 | .byte -6 | |
3731 | .byte -6 | |
3732 | .byte -6 | |
3733 | .byte -6 | |
3734 | .byte -6 | |
3735 | .byte -6 | |
3736 | .byte -6 | |
3737 | .byte -6 | |
3738 | .byte -6 | |
3739 | .byte -6 | |
3740 | .byte -6 | |
3741 | .byte -6 | |
3742 | .byte -6 | |
3743 | .byte -6 | |
3744 | .byte -6 | |
3745 | .byte -6 | |
3746 | .byte -6 | |
3747 | .byte -6 | |
3748 | .byte -6 | |
3749 | .byte -6 | |
3750 | .byte -6 | |
3751 | .byte -6 | |
3752 | .byte -6 | |
3753 | .byte -6 | |
3754 | .byte -6 | |
3755 | .byte -6 | |
3756 | .byte -6 | |
3757 | .byte -6 | |
3758 | .byte -6 | |
3759 | .byte -6 | |
3760 | .byte -6 | |
3761 | .byte -6 | |
3762 | .byte -6 | |
3763 | .byte -6 | |
3764 | .byte -6 | |
3765 | .byte -6 | |
3766 | .byte -6 | |
3767 | .byte -6 | |
3768 | .byte -6 | |
3769 | .byte -6 | |
3770 | .byte -6 | |
3771 | .byte -6 | |
3772 | .byte -6 | |
3773 | .byte -6 | |
3774 | .byte -6 | |
3775 | .byte -6 | |
3776 | .byte -6 | |
3777 | .byte -6 | |
3778 | .byte -6 | |
3779 | .byte -6 | |
3780 | .byte -6 | |
3781 | .byte -6 | |
3782 | .byte -6 | |
3783 | .byte -6 | |
3784 | .byte -6 | |
3785 | .byte -6 | |
3786 | .byte -6 | |
3787 | .byte -6 | |
3788 | .byte -6 | |
3789 | .byte -6 | |
3790 | .byte -6 | |
3791 | .byte -6 | |
3792 | .byte -6 | |
3793 | /* Lookup table translating positive divisor to index into table of | |
3794 | normalized inverse. N.B. the '0' entry is also the last entry of the | |
3795 | previous table, and causes an unaligned access for division by zero. */ | |
3796 | LOCAL(div_table_ix): | |
3797 | .byte -6 | |
3798 | .byte -128 | |
3799 | .byte -128 | |
3800 | .byte 0 | |
3801 | .byte -128 | |
3802 | .byte -64 | |
3803 | .byte 0 | |
3804 | .byte 64 | |
3805 | .byte -128 | |
3806 | .byte -96 | |
3807 | .byte -64 | |
3808 | .byte -32 | |
3809 | .byte 0 | |
3810 | .byte 32 | |
3811 | .byte 64 | |
3812 | .byte 96 | |
3813 | .byte -128 | |
3814 | .byte -112 | |
3815 | .byte -96 | |
3816 | .byte -80 | |
3817 | .byte -64 | |
3818 | .byte -48 | |
3819 | .byte -32 | |
3820 | .byte -16 | |
3821 | .byte 0 | |
3822 | .byte 16 | |
3823 | .byte 32 | |
3824 | .byte 48 | |
3825 | .byte 64 | |
3826 | .byte 80 | |
3827 | .byte 96 | |
3828 | .byte 112 | |
3829 | .byte -128 | |
3830 | .byte -120 | |
3831 | .byte -112 | |
3832 | .byte -104 | |
3833 | .byte -96 | |
3834 | .byte -88 | |
3835 | .byte -80 | |
3836 | .byte -72 | |
3837 | .byte -64 | |
3838 | .byte -56 | |
3839 | .byte -48 | |
3840 | .byte -40 | |
3841 | .byte -32 | |
3842 | .byte -24 | |
3843 | .byte -16 | |
3844 | .byte -8 | |
3845 | .byte 0 | |
3846 | .byte 8 | |
3847 | .byte 16 | |
3848 | .byte 24 | |
3849 | .byte 32 | |
3850 | .byte 40 | |
3851 | .byte 48 | |
3852 | .byte 56 | |
3853 | .byte 64 | |
3854 | .byte 72 | |
3855 | .byte 80 | |
3856 | .byte 88 | |
3857 | .byte 96 | |
3858 | .byte 104 | |
3859 | .byte 112 | |
3860 | .byte 120 | |
3861 | .byte -128 | |
3862 | .byte -124 | |
3863 | .byte -120 | |
3864 | .byte -116 | |
3865 | .byte -112 | |
3866 | .byte -108 | |
3867 | .byte -104 | |
3868 | .byte -100 | |
3869 | .byte -96 | |
3870 | .byte -92 | |
3871 | .byte -88 | |
3872 | .byte -84 | |
3873 | .byte -80 | |
3874 | .byte -76 | |
3875 | .byte -72 | |
3876 | .byte -68 | |
3877 | .byte -64 | |
3878 | .byte -60 | |
3879 | .byte -56 | |
3880 | .byte -52 | |
3881 | .byte -48 | |
3882 | .byte -44 | |
3883 | .byte -40 | |
3884 | .byte -36 | |
3885 | .byte -32 | |
3886 | .byte -28 | |
3887 | .byte -24 | |
3888 | .byte -20 | |
3889 | .byte -16 | |
3890 | .byte -12 | |
3891 | .byte -8 | |
3892 | .byte -4 | |
3893 | .byte 0 | |
3894 | .byte 4 | |
3895 | .byte 8 | |
3896 | .byte 12 | |
3897 | .byte 16 | |
3898 | .byte 20 | |
3899 | .byte 24 | |
3900 | .byte 28 | |
3901 | .byte 32 | |
3902 | .byte 36 | |
3903 | .byte 40 | |
3904 | .byte 44 | |
3905 | .byte 48 | |
3906 | .byte 52 | |
3907 | .byte 56 | |
3908 | .byte 60 | |
3909 | .byte 64 | |
3910 | .byte 68 | |
3911 | .byte 72 | |
3912 | .byte 76 | |
3913 | .byte 80 | |
3914 | .byte 84 | |
3915 | .byte 88 | |
3916 | .byte 92 | |
3917 | .byte 96 | |
3918 | .byte 100 | |
3919 | .byte 104 | |
3920 | .byte 108 | |
3921 | .byte 112 | |
3922 | .byte 116 | |
3923 | .byte 120 | |
3924 | .byte 124 | |
3925 | .byte -128 | |
3926 | /* 1/64 .. 1/127, normalized. There is an implicit leading 1 in bit 32. */ | |
3927 | .balign 4 | |
3928 | LOCAL(zero_l): | |
3929 | .long 0x0 | |
3930 | .long 0xF81F81F9 | |
3931 | .long 0xF07C1F08 | |
3932 | .long 0xE9131AC0 | |
3933 | .long 0xE1E1E1E2 | |
3934 | .long 0xDAE6076C | |
3935 | .long 0xD41D41D5 | |
3936 | .long 0xCD856891 | |
3937 | .long 0xC71C71C8 | |
3938 | .long 0xC0E07039 | |
3939 | .long 0xBACF914D | |
3940 | .long 0xB4E81B4F | |
3941 | .long 0xAF286BCB | |
3942 | .long 0xA98EF607 | |
3943 | .long 0xA41A41A5 | |
3944 | .long 0x9EC8E952 | |
3945 | .long 0x9999999A | |
3946 | .long 0x948B0FCE | |
3947 | .long 0x8F9C18FA | |
3948 | .long 0x8ACB90F7 | |
3949 | .long 0x86186187 | |
3950 | .long 0x81818182 | |
3951 | .long 0x7D05F418 | |
3952 | .long 0x78A4C818 | |
3953 | .long 0x745D1746 | |
3954 | .long 0x702E05C1 | |
3955 | .long 0x6C16C16D | |
3956 | .long 0x68168169 | |
3957 | .long 0x642C8591 | |
3958 | .long 0x60581606 | |
3959 | .long 0x5C9882BA | |
3960 | .long 0x58ED2309 | |
3961 | LOCAL(div_table_inv): | |
3962 | .long 0x55555556 | |
3963 | .long 0x51D07EAF | |
3964 | .long 0x4E5E0A73 | |
3965 | .long 0x4AFD6A06 | |
3966 | .long 0x47AE147B | |
3967 | .long 0x446F8657 | |
3968 | .long 0x41414142 | |
3969 | .long 0x3E22CBCF | |
3970 | .long 0x3B13B13C | |
3971 | .long 0x38138139 | |
3972 | .long 0x3521CFB3 | |
3973 | .long 0x323E34A3 | |
3974 | .long 0x2F684BDB | |
3975 | .long 0x2C9FB4D9 | |
3976 | .long 0x29E4129F | |
3977 | .long 0x27350B89 | |
3978 | .long 0x24924925 | |
3979 | .long 0x21FB7813 | |
3980 | .long 0x1F7047DD | |
3981 | .long 0x1CF06ADB | |
3982 | .long 0x1A7B9612 | |
3983 | .long 0x18118119 | |
3984 | .long 0x15B1E5F8 | |
3985 | .long 0x135C8114 | |
3986 | .long 0x11111112 | |
3987 | .long 0xECF56BF | |
3988 | .long 0xC9714FC | |
3989 | .long 0xA6810A7 | |
3990 | .long 0x8421085 | |
3991 | .long 0x624DD30 | |
3992 | .long 0x4104105 | |
3993 | .long 0x2040811 | |
3994 | /* maximum error: 0.987342 scaled: 0.921875*/ | |
3995 | ||
3996 | ENDFUNC(GLOBAL(sdivsi3_i4i)) | |
3997 | #endif /* SH3 / SH4 */ | |
3998 | ||
59312820 | 3999 | #endif /* L_div_table */ |
2b2f5cfb | 4000 | |
4001 | #ifdef L_udiv_qrnnd_16 | |
4002 | #if !__SHMEDIA__ | |
4003 | HIDDEN_FUNC(GLOBAL(udiv_qrnnd_16)) | |
4004 | /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */ | |
4005 | /* n1 < d, but n1 might be larger than d1. */ | |
4006 | .global GLOBAL(udiv_qrnnd_16) | |
4007 | .balign 8 | |
4008 | GLOBAL(udiv_qrnnd_16): | |
4009 | div0u | |
4010 | cmp/hi r6,r0 | |
4011 | bt .Lots | |
4012 | .rept 16 | |
4013 | div1 r6,r0 | |
4014 | .endr | |
4015 | extu.w r0,r1 | |
4016 | bt 0f | |
4017 | add r6,r0 | |
4018 | 0: rotcl r1 | |
4019 | mulu.w r1,r5 | |
4020 | xtrct r4,r0 | |
4021 | swap.w r0,r0 | |
4022 | sts macl,r2 | |
4023 | cmp/hs r2,r0 | |
4024 | sub r2,r0 | |
4025 | bt 0f | |
4026 | addc r5,r0 | |
4027 | add #-1,r1 | |
4028 | bt 0f | |
4029 | 1: add #-1,r1 | |
4030 | rts | |
4031 | add r5,r0 | |
4032 | .balign 8 | |
4033 | .Lots: | |
4034 | sub r5,r0 | |
4035 | swap.w r4,r1 | |
4036 | xtrct r0,r1 | |
4037 | clrt | |
4038 | mov r1,r0 | |
4039 | addc r5,r0 | |
4040 | mov #-1,r1 | |
4041 | SL1(bf, 1b, | |
4042 | shlr16 r1) | |
4043 | 0: rts | |
4044 | nop | |
4045 | ENDFUNC(GLOBAL(udiv_qrnnd_16)) | |
4046 | #endif /* !__SHMEDIA__ */ | |
4047 | #endif /* L_udiv_qrnnd_16 */ |