]> git.ipfire.org Git - thirdparty/gcc.git/blame - libgcc/config/pa/milli64.S
Enable no-exec stacks for more targets using the Linux kernel.
[thirdparty/gcc.git] / libgcc / config / pa / milli64.S
CommitLineData
fe660a1a 1/* 32 and 64-bit millicode, original author Hewlett-Packard
48bd7758 2 adapted for gcc by Paul Bame <bame@debian.org>
fe660a1a 3 and Alan Modra <alan@linuxcare.com.au>.
48bd7758 4
cbe34bb5 5 Copyright (C) 2001-2017 Free Software Foundation, Inc.
48bd7758 6
0af0580f 7This file is part of GCC.
48bd7758 8
0af0580f
JDA
9GCC is free software; you can redistribute it and/or modify it under
10the terms of the GNU General Public License as published by the Free
748086b7 11Software Foundation; either version 3, or (at your option) any later
0af0580f
JDA
12version.
13
0af0580f
JDA
14GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15WARRANTY; without even the implied warranty of MERCHANTABILITY or
16FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17for more details.
18
748086b7
JJ
19Under Section 7 of GPL version 3, you are granted additional
20permissions described in the GCC Runtime Library Exception, version
213.1, as published by the Free Software Foundation.
22
23You should have received a copy of the GNU General Public License and
24a copy of the GCC Runtime Library Exception along with this program;
25see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
26<http://www.gnu.org/licenses/>. */
48bd7758 27
938b6f1e
JM
28/* An executable stack is *not* required for these functions. */
29#if defined(__ELF__) && defined(__linux__)
30.section .note.GNU-stack,"",%progbits
31.previous
32#endif
33
48bd7758
AM
34#ifdef pa64
35 .level 2.0w
36#endif
37
38/* Hardware General Registers. */
39r0: .reg %r0
40r1: .reg %r1
41r2: .reg %r2
42r3: .reg %r3
43r4: .reg %r4
44r5: .reg %r5
45r6: .reg %r6
46r7: .reg %r7
47r8: .reg %r8
48r9: .reg %r9
49r10: .reg %r10
50r11: .reg %r11
51r12: .reg %r12
52r13: .reg %r13
53r14: .reg %r14
54r15: .reg %r15
55r16: .reg %r16
56r17: .reg %r17
57r18: .reg %r18
58r19: .reg %r19
59r20: .reg %r20
60r21: .reg %r21
61r22: .reg %r22
62r23: .reg %r23
63r24: .reg %r24
64r25: .reg %r25
65r26: .reg %r26
66r27: .reg %r27
67r28: .reg %r28
68r29: .reg %r29
69r30: .reg %r30
70r31: .reg %r31
71
72/* Hardware Space Registers. */
73sr0: .reg %sr0
74sr1: .reg %sr1
75sr2: .reg %sr2
76sr3: .reg %sr3
77sr4: .reg %sr4
78sr5: .reg %sr5
79sr6: .reg %sr6
80sr7: .reg %sr7
81
82/* Hardware Floating Point Registers. */
83fr0: .reg %fr0
84fr1: .reg %fr1
85fr2: .reg %fr2
86fr3: .reg %fr3
87fr4: .reg %fr4
88fr5: .reg %fr5
89fr6: .reg %fr6
90fr7: .reg %fr7
91fr8: .reg %fr8
92fr9: .reg %fr9
93fr10: .reg %fr10
94fr11: .reg %fr11
95fr12: .reg %fr12
96fr13: .reg %fr13
97fr14: .reg %fr14
98fr15: .reg %fr15
99
100/* Hardware Control Registers. */
101cr11: .reg %cr11
102sar: .reg %cr11 /* Shift Amount Register */
103
104/* Software Architecture General Registers. */
105rp: .reg r2 /* return pointer */
106#ifdef pa64
107mrp: .reg r2 /* millicode return pointer */
108#else
109mrp: .reg r31 /* millicode return pointer */
110#endif
111ret0: .reg r28 /* return value */
112ret1: .reg r29 /* return value (high part of double) */
113sp: .reg r30 /* stack pointer */
114dp: .reg r27 /* data pointer */
115arg0: .reg r26 /* argument */
116arg1: .reg r25 /* argument or high part of double argument */
117arg2: .reg r24 /* argument */
118arg3: .reg r23 /* argument or high part of double argument */
119
120/* Software Architecture Space Registers. */
121/* sr0 ; return link from BLE */
122sret: .reg sr1 /* return value */
123sarg: .reg sr1 /* argument */
124/* sr4 ; PC SPACE tracker */
125/* sr5 ; process private data */
126
127/* Frame Offsets (millicode convention!) Used when calling other
128 millicode routines. Stack unwinding is dependent upon these
129 definitions. */
130r31_slot: .equ -20 /* "current RP" slot */
131sr0_slot: .equ -16 /* "static link" slot */
132#if defined(pa64)
133mrp_slot: .equ -16 /* "current RP" slot */
134psp_slot: .equ -8 /* "previous SP" slot */
135#else
136mrp_slot: .equ -20 /* "current RP" slot (replacing "r31_slot") */
137#endif
138
139
140#define DEFINE(name,value)name: .EQU value
141#define RDEFINE(name,value)name: .REG value
142#ifdef milliext
143#define MILLI_BE(lbl) BE lbl(sr7,r0)
144#define MILLI_BEN(lbl) BE,n lbl(sr7,r0)
145#define MILLI_BLE(lbl) BLE lbl(sr7,r0)
146#define MILLI_BLEN(lbl) BLE,n lbl(sr7,r0)
147#define MILLIRETN BE,n 0(sr0,mrp)
148#define MILLIRET BE 0(sr0,mrp)
149#define MILLI_RETN BE,n 0(sr0,mrp)
150#define MILLI_RET BE 0(sr0,mrp)
151#else
152#define MILLI_BE(lbl) B lbl
153#define MILLI_BEN(lbl) B,n lbl
154#define MILLI_BLE(lbl) BL lbl,mrp
155#define MILLI_BLEN(lbl) BL,n lbl,mrp
156#define MILLIRETN BV,n 0(mrp)
157#define MILLIRET BV 0(mrp)
158#define MILLI_RETN BV,n 0(mrp)
159#define MILLI_RET BV 0(mrp)
160#endif
161
162#ifdef __STDC__
163#define CAT(a,b) a##b
164#else
165#define CAT(a,b) a/**/b
166#endif
167
168#ifdef ELF
169#define SUBSPA_MILLI .section .text
170#define SUBSPA_MILLI_DIV .section .text.div,"ax",@progbits! .align 16
171#define SUBSPA_MILLI_MUL .section .text.mul,"ax",@progbits! .align 16
172#define ATTR_MILLI
173#define SUBSPA_DATA .section .data
174#define ATTR_DATA
175#define GLOBAL $global$
176#define GSYM(sym) !sym:
177#define LSYM(sym) !CAT(.L,sym:)
178#define LREF(sym) CAT(.L,sym)
179
180#else
181
182#ifdef coff
183/* This used to be .milli but since link32 places different named
184 sections in different segments millicode ends up a long ways away
185 from .text (1meg?). This way they will be a lot closer.
186
187 The SUBSPA_MILLI_* specify locality sets for certain millicode
188 modules in order to ensure that modules that call one another are
189 placed close together. Without locality sets this is unlikely to
190 happen because of the Dynamite linker library search algorithm. We
191 want these modules close together so that short calls always reach
192 (we don't want to require long calls or use long call stubs). */
193
194#define SUBSPA_MILLI .subspa .text
195#define SUBSPA_MILLI_DIV .subspa .text$dv,align=16
196#define SUBSPA_MILLI_MUL .subspa .text$mu,align=16
197#define ATTR_MILLI .attr code,read,execute
198#define SUBSPA_DATA .subspa .data
199#define ATTR_DATA .attr init_data,read,write
200#define GLOBAL _gp
201#else
202#define SUBSPA_MILLI .subspa $MILLICODE$,QUAD=0,ALIGN=4,ACCESS=0x2c,SORT=8
203#define SUBSPA_MILLI_DIV SUBSPA_MILLI
204#define SUBSPA_MILLI_MUL SUBSPA_MILLI
205#define ATTR_MILLI
206#define SUBSPA_DATA .subspa $BSS$,quad=1,align=8,access=0x1f,sort=80,zero
207#define ATTR_DATA
208#define GLOBAL $global$
209#endif
210#define SPACE_DATA .space $PRIVATE$,spnum=1,sort=16
211
212#define GSYM(sym) !sym
213#define LSYM(sym) !CAT(L$,sym)
214#define LREF(sym) CAT(L$,sym)
215#endif
216
fe660a1a
JDA
217#ifdef L_dyncall
218 SUBSPA_MILLI
219 ATTR_DATA
220GSYM($$dyncall)
221 .export $$dyncall,millicode
222 .proc
223 .callinfo millicode
224 .entry
225 bb,>=,n %r22,30,LREF(1) ; branch if not plabel address
226 depi 0,31,2,%r22 ; clear the two least significant bits
227 ldw 4(%r22),%r19 ; load new LTP value
228 ldw 0(%r22),%r22 ; load address of target
229LSYM(1)
230#ifdef LINUX
231 bv %r0(%r22) ; branch to the real target
232#else
233 ldsid (%sr0,%r22),%r1 ; get the "space ident" selected by r22
234 mtsp %r1,%sr0 ; move that space identifier into sr0
235 be 0(%sr0,%r22) ; branch to the real target
236#endif
237 stw %r2,-24(%r30) ; save return address into frame marker
238 .exit
239 .procend
240#endif
48bd7758
AM
241
242#ifdef L_divI
243/* ROUTINES: $$divI, $$divoI
244
245 Single precision divide for signed binary integers.
246
247 The quotient is truncated towards zero.
248 The sign of the quotient is the XOR of the signs of the dividend and
249 divisor.
250 Divide by zero is trapped.
251 Divide of -2**31 by -1 is trapped for $$divoI but not for $$divI.
252
253 INPUT REGISTERS:
254 . arg0 == dividend
255 . arg1 == divisor
256 . mrp == return pc
257 . sr0 == return space when called externally
258
259 OUTPUT REGISTERS:
260 . arg0 = undefined
261 . arg1 = undefined
262 . ret1 = quotient
263
264 OTHER REGISTERS AFFECTED:
265 . r1 = undefined
266
267 SIDE EFFECTS:
268 . Causes a trap under the following conditions:
269 . divisor is zero (traps with ADDIT,= 0,25,0)
270 . dividend==-2**31 and divisor==-1 and routine is $$divoI
271 . (traps with ADDO 26,25,0)
272 . Changes memory at the following places:
273 . NONE
274
275 PERMISSIBLE CONTEXT:
276 . Unwindable.
277 . Suitable for internal or external millicode.
278 . Assumes the special millicode register conventions.
279
280 DISCUSSION:
281 . Branchs to other millicode routines using BE
282 . $$div_# for # being 2,3,4,5,6,7,8,9,10,12,14,15
283 .
284 . For selected divisors, calls a divide by constant routine written by
285 . Karl Pettis. Eligible divisors are 1..15 excluding 11 and 13.
286 .
287 . The only overflow case is -2**31 divided by -1.
288 . Both routines return -2**31 but only $$divoI traps. */
289
290RDEFINE(temp,r1)
291RDEFINE(retreg,ret1) /* r29 */
292RDEFINE(temp1,arg0)
293 SUBSPA_MILLI_DIV
294 ATTR_MILLI
295 .import $$divI_2,millicode
296 .import $$divI_3,millicode
297 .import $$divI_4,millicode
298 .import $$divI_5,millicode
299 .import $$divI_6,millicode
300 .import $$divI_7,millicode
301 .import $$divI_8,millicode
302 .import $$divI_9,millicode
303 .import $$divI_10,millicode
304 .import $$divI_12,millicode
305 .import $$divI_14,millicode
306 .import $$divI_15,millicode
307 .export $$divI,millicode
308 .export $$divoI,millicode
309 .proc
310 .callinfo millicode
311 .entry
312GSYM($$divoI)
313 comib,=,n -1,arg1,LREF(negative1) /* when divisor == -1 */
314GSYM($$divI)
315 ldo -1(arg1),temp /* is there at most one bit set ? */
316 and,<> arg1,temp,r0 /* if not, don't use power of 2 divide */
317 addi,> 0,arg1,r0 /* if divisor > 0, use power of 2 divide */
318 b,n LREF(neg_denom)
319LSYM(pow2)
320 addi,>= 0,arg0,retreg /* if numerator is negative, add the */
321 add arg0,temp,retreg /* (denominaotr -1) to correct for shifts */
322 extru,= arg1,15,16,temp /* test denominator with 0xffff0000 */
323 extrs retreg,15,16,retreg /* retreg = retreg >> 16 */
324 or arg1,temp,arg1 /* arg1 = arg1 | (arg1 >> 16) */
325 ldi 0xcc,temp1 /* setup 0xcc in temp1 */
326 extru,= arg1,23,8,temp /* test denominator with 0xff00 */
327 extrs retreg,23,24,retreg /* retreg = retreg >> 8 */
328 or arg1,temp,arg1 /* arg1 = arg1 | (arg1 >> 8) */
329 ldi 0xaa,temp /* setup 0xaa in temp */
330 extru,= arg1,27,4,r0 /* test denominator with 0xf0 */
331 extrs retreg,27,28,retreg /* retreg = retreg >> 4 */
332 and,= arg1,temp1,r0 /* test denominator with 0xcc */
333 extrs retreg,29,30,retreg /* retreg = retreg >> 2 */
334 and,= arg1,temp,r0 /* test denominator with 0xaa */
335 extrs retreg,30,31,retreg /* retreg = retreg >> 1 */
336 MILLIRETN
337LSYM(neg_denom)
338 addi,< 0,arg1,r0 /* if arg1 >= 0, it's not power of 2 */
339 b,n LREF(regular_seq)
340 sub r0,arg1,temp /* make denominator positive */
341 comb,=,n arg1,temp,LREF(regular_seq) /* test against 0x80000000 and 0 */
342 ldo -1(temp),retreg /* is there at most one bit set ? */
343 and,= temp,retreg,r0 /* if so, the denominator is power of 2 */
344 b,n LREF(regular_seq)
345 sub r0,arg0,retreg /* negate numerator */
346 comb,=,n arg0,retreg,LREF(regular_seq) /* test against 0x80000000 */
347 copy retreg,arg0 /* set up arg0, arg1 and temp */
348 copy temp,arg1 /* before branching to pow2 */
349 b LREF(pow2)
350 ldo -1(arg1),temp
351LSYM(regular_seq)
352 comib,>>=,n 15,arg1,LREF(small_divisor)
353 add,>= 0,arg0,retreg /* move dividend, if retreg < 0, */
354LSYM(normal)
355 subi 0,retreg,retreg /* make it positive */
356 sub 0,arg1,temp /* clear carry, */
357 /* negate the divisor */
358 ds 0,temp,0 /* set V-bit to the comple- */
359 /* ment of the divisor sign */
360 add retreg,retreg,retreg /* shift msb bit into carry */
361 ds r0,arg1,temp /* 1st divide step, if no carry */
362 addc retreg,retreg,retreg /* shift retreg with/into carry */
363 ds temp,arg1,temp /* 2nd divide step */
364 addc retreg,retreg,retreg /* shift retreg with/into carry */
365 ds temp,arg1,temp /* 3rd divide step */
366 addc retreg,retreg,retreg /* shift retreg with/into carry */
367 ds temp,arg1,temp /* 4th divide step */
368 addc retreg,retreg,retreg /* shift retreg with/into carry */
369 ds temp,arg1,temp /* 5th divide step */
370 addc retreg,retreg,retreg /* shift retreg with/into carry */
371 ds temp,arg1,temp /* 6th divide step */
372 addc retreg,retreg,retreg /* shift retreg with/into carry */
373 ds temp,arg1,temp /* 7th divide step */
374 addc retreg,retreg,retreg /* shift retreg with/into carry */
375 ds temp,arg1,temp /* 8th divide step */
376 addc retreg,retreg,retreg /* shift retreg with/into carry */
377 ds temp,arg1,temp /* 9th divide step */
378 addc retreg,retreg,retreg /* shift retreg with/into carry */
379 ds temp,arg1,temp /* 10th divide step */
380 addc retreg,retreg,retreg /* shift retreg with/into carry */
381 ds temp,arg1,temp /* 11th divide step */
382 addc retreg,retreg,retreg /* shift retreg with/into carry */
383 ds temp,arg1,temp /* 12th divide step */
384 addc retreg,retreg,retreg /* shift retreg with/into carry */
385 ds temp,arg1,temp /* 13th divide step */
386 addc retreg,retreg,retreg /* shift retreg with/into carry */
387 ds temp,arg1,temp /* 14th divide step */
388 addc retreg,retreg,retreg /* shift retreg with/into carry */
389 ds temp,arg1,temp /* 15th divide step */
390 addc retreg,retreg,retreg /* shift retreg with/into carry */
391 ds temp,arg1,temp /* 16th divide step */
392 addc retreg,retreg,retreg /* shift retreg with/into carry */
393 ds temp,arg1,temp /* 17th divide step */
394 addc retreg,retreg,retreg /* shift retreg with/into carry */
395 ds temp,arg1,temp /* 18th divide step */
396 addc retreg,retreg,retreg /* shift retreg with/into carry */
397 ds temp,arg1,temp /* 19th divide step */
398 addc retreg,retreg,retreg /* shift retreg with/into carry */
399 ds temp,arg1,temp /* 20th divide step */
400 addc retreg,retreg,retreg /* shift retreg with/into carry */
401 ds temp,arg1,temp /* 21st divide step */
402 addc retreg,retreg,retreg /* shift retreg with/into carry */
403 ds temp,arg1,temp /* 22nd divide step */
404 addc retreg,retreg,retreg /* shift retreg with/into carry */
405 ds temp,arg1,temp /* 23rd divide step */
406 addc retreg,retreg,retreg /* shift retreg with/into carry */
407 ds temp,arg1,temp /* 24th divide step */
408 addc retreg,retreg,retreg /* shift retreg with/into carry */
409 ds temp,arg1,temp /* 25th divide step */
410 addc retreg,retreg,retreg /* shift retreg with/into carry */
411 ds temp,arg1,temp /* 26th divide step */
412 addc retreg,retreg,retreg /* shift retreg with/into carry */
413 ds temp,arg1,temp /* 27th divide step */
414 addc retreg,retreg,retreg /* shift retreg with/into carry */
415 ds temp,arg1,temp /* 28th divide step */
416 addc retreg,retreg,retreg /* shift retreg with/into carry */
417 ds temp,arg1,temp /* 29th divide step */
418 addc retreg,retreg,retreg /* shift retreg with/into carry */
419 ds temp,arg1,temp /* 30th divide step */
420 addc retreg,retreg,retreg /* shift retreg with/into carry */
421 ds temp,arg1,temp /* 31st divide step */
422 addc retreg,retreg,retreg /* shift retreg with/into carry */
423 ds temp,arg1,temp /* 32nd divide step, */
424 addc retreg,retreg,retreg /* shift last retreg bit into retreg */
425 xor,>= arg0,arg1,0 /* get correct sign of quotient */
426 sub 0,retreg,retreg /* based on operand signs */
427 MILLIRETN
428 nop
429
430LSYM(small_divisor)
431
432#if defined(pa64)
433/* Clear the upper 32 bits of the arg1 register. We are working with */
a7b376ee 434/* small divisors (and 32-bit integers) We must not be mislead */
fe19a83d 435/* by "1" bits left in the upper 32 bits. */
9db5bf71 436 depd %r0,31,32,%r25
48bd7758
AM
437#endif
438 blr,n arg1,r0
439 nop
440/* table for divisor == 0,1, ... ,15 */
441 addit,= 0,arg1,r0 /* trap if divisor == 0 */
442 nop
443 MILLIRET /* divisor == 1 */
444 copy arg0,retreg
445 MILLI_BEN($$divI_2) /* divisor == 2 */
446 nop
447 MILLI_BEN($$divI_3) /* divisor == 3 */
448 nop
449 MILLI_BEN($$divI_4) /* divisor == 4 */
450 nop
451 MILLI_BEN($$divI_5) /* divisor == 5 */
452 nop
453 MILLI_BEN($$divI_6) /* divisor == 6 */
454 nop
455 MILLI_BEN($$divI_7) /* divisor == 7 */
456 nop
457 MILLI_BEN($$divI_8) /* divisor == 8 */
458 nop
459 MILLI_BEN($$divI_9) /* divisor == 9 */
460 nop
461 MILLI_BEN($$divI_10) /* divisor == 10 */
462 nop
463 b LREF(normal) /* divisor == 11 */
464 add,>= 0,arg0,retreg
465 MILLI_BEN($$divI_12) /* divisor == 12 */
466 nop
467 b LREF(normal) /* divisor == 13 */
468 add,>= 0,arg0,retreg
469 MILLI_BEN($$divI_14) /* divisor == 14 */
470 nop
471 MILLI_BEN($$divI_15) /* divisor == 15 */
472 nop
473
474LSYM(negative1)
475 sub 0,arg0,retreg /* result is negation of dividend */
476 MILLIRET
477 addo arg0,arg1,r0 /* trap iff dividend==0x80000000 && divisor==-1 */
478 .exit
479 .procend
480 .end
481#endif
482
483#ifdef L_divU
484/* ROUTINE: $$divU
485 .
486 . Single precision divide for unsigned integers.
487 .
488 . Quotient is truncated towards zero.
489 . Traps on divide by zero.
490
491 INPUT REGISTERS:
492 . arg0 == dividend
493 . arg1 == divisor
494 . mrp == return pc
495 . sr0 == return space when called externally
496
497 OUTPUT REGISTERS:
498 . arg0 = undefined
499 . arg1 = undefined
500 . ret1 = quotient
501
502 OTHER REGISTERS AFFECTED:
503 . r1 = undefined
504
505 SIDE EFFECTS:
506 . Causes a trap under the following conditions:
507 . divisor is zero
508 . Changes memory at the following places:
509 . NONE
510
511 PERMISSIBLE CONTEXT:
512 . Unwindable.
513 . Does not create a stack frame.
514 . Suitable for internal or external millicode.
515 . Assumes the special millicode register conventions.
516
517 DISCUSSION:
518 . Branchs to other millicode routines using BE:
519 . $$divU_# for 3,5,6,7,9,10,12,14,15
520 .
521 . For selected small divisors calls the special divide by constant
522 . routines written by Karl Pettis. These are: 3,5,6,7,9,10,12,14,15. */
523
524RDEFINE(temp,r1)
525RDEFINE(retreg,ret1) /* r29 */
526RDEFINE(temp1,arg0)
527 SUBSPA_MILLI_DIV
528 ATTR_MILLI
529 .export $$divU,millicode
530 .import $$divU_3,millicode
531 .import $$divU_5,millicode
532 .import $$divU_6,millicode
533 .import $$divU_7,millicode
534 .import $$divU_9,millicode
535 .import $$divU_10,millicode
536 .import $$divU_12,millicode
537 .import $$divU_14,millicode
538 .import $$divU_15,millicode
539 .proc
540 .callinfo millicode
541 .entry
542GSYM($$divU)
543/* The subtract is not nullified since it does no harm and can be used
544 by the two cases that branch back to "normal". */
545 ldo -1(arg1),temp /* is there at most one bit set ? */
546 and,= arg1,temp,r0 /* if so, denominator is power of 2 */
547 b LREF(regular_seq)
548 addit,= 0,arg1,0 /* trap for zero dvr */
549 copy arg0,retreg
550 extru,= arg1,15,16,temp /* test denominator with 0xffff0000 */
551 extru retreg,15,16,retreg /* retreg = retreg >> 16 */
552 or arg1,temp,arg1 /* arg1 = arg1 | (arg1 >> 16) */
553 ldi 0xcc,temp1 /* setup 0xcc in temp1 */
554 extru,= arg1,23,8,temp /* test denominator with 0xff00 */
555 extru retreg,23,24,retreg /* retreg = retreg >> 8 */
556 or arg1,temp,arg1 /* arg1 = arg1 | (arg1 >> 8) */
557 ldi 0xaa,temp /* setup 0xaa in temp */
558 extru,= arg1,27,4,r0 /* test denominator with 0xf0 */
559 extru retreg,27,28,retreg /* retreg = retreg >> 4 */
560 and,= arg1,temp1,r0 /* test denominator with 0xcc */
561 extru retreg,29,30,retreg /* retreg = retreg >> 2 */
562 and,= arg1,temp,r0 /* test denominator with 0xaa */
563 extru retreg,30,31,retreg /* retreg = retreg >> 1 */
564 MILLIRETN
565 nop
566LSYM(regular_seq)
567 comib,>= 15,arg1,LREF(special_divisor)
568 subi 0,arg1,temp /* clear carry, negate the divisor */
569 ds r0,temp,r0 /* set V-bit to 1 */
570LSYM(normal)
571 add arg0,arg0,retreg /* shift msb bit into carry */
572 ds r0,arg1,temp /* 1st divide step, if no carry */
573 addc retreg,retreg,retreg /* shift retreg with/into carry */
574 ds temp,arg1,temp /* 2nd divide step */
575 addc retreg,retreg,retreg /* shift retreg with/into carry */
576 ds temp,arg1,temp /* 3rd divide step */
577 addc retreg,retreg,retreg /* shift retreg with/into carry */
578 ds temp,arg1,temp /* 4th divide step */
579 addc retreg,retreg,retreg /* shift retreg with/into carry */
580 ds temp,arg1,temp /* 5th divide step */
581 addc retreg,retreg,retreg /* shift retreg with/into carry */
582 ds temp,arg1,temp /* 6th divide step */
583 addc retreg,retreg,retreg /* shift retreg with/into carry */
584 ds temp,arg1,temp /* 7th divide step */
585 addc retreg,retreg,retreg /* shift retreg with/into carry */
586 ds temp,arg1,temp /* 8th divide step */
587 addc retreg,retreg,retreg /* shift retreg with/into carry */
588 ds temp,arg1,temp /* 9th divide step */
589 addc retreg,retreg,retreg /* shift retreg with/into carry */
590 ds temp,arg1,temp /* 10th divide step */
591 addc retreg,retreg,retreg /* shift retreg with/into carry */
592 ds temp,arg1,temp /* 11th divide step */
593 addc retreg,retreg,retreg /* shift retreg with/into carry */
594 ds temp,arg1,temp /* 12th divide step */
595 addc retreg,retreg,retreg /* shift retreg with/into carry */
596 ds temp,arg1,temp /* 13th divide step */
597 addc retreg,retreg,retreg /* shift retreg with/into carry */
598 ds temp,arg1,temp /* 14th divide step */
599 addc retreg,retreg,retreg /* shift retreg with/into carry */
600 ds temp,arg1,temp /* 15th divide step */
601 addc retreg,retreg,retreg /* shift retreg with/into carry */
602 ds temp,arg1,temp /* 16th divide step */
603 addc retreg,retreg,retreg /* shift retreg with/into carry */
604 ds temp,arg1,temp /* 17th divide step */
605 addc retreg,retreg,retreg /* shift retreg with/into carry */
606 ds temp,arg1,temp /* 18th divide step */
607 addc retreg,retreg,retreg /* shift retreg with/into carry */
608 ds temp,arg1,temp /* 19th divide step */
609 addc retreg,retreg,retreg /* shift retreg with/into carry */
610 ds temp,arg1,temp /* 20th divide step */
611 addc retreg,retreg,retreg /* shift retreg with/into carry */
612 ds temp,arg1,temp /* 21st divide step */
613 addc retreg,retreg,retreg /* shift retreg with/into carry */
614 ds temp,arg1,temp /* 22nd divide step */
615 addc retreg,retreg,retreg /* shift retreg with/into carry */
616 ds temp,arg1,temp /* 23rd divide step */
617 addc retreg,retreg,retreg /* shift retreg with/into carry */
618 ds temp,arg1,temp /* 24th divide step */
619 addc retreg,retreg,retreg /* shift retreg with/into carry */
620 ds temp,arg1,temp /* 25th divide step */
621 addc retreg,retreg,retreg /* shift retreg with/into carry */
622 ds temp,arg1,temp /* 26th divide step */
623 addc retreg,retreg,retreg /* shift retreg with/into carry */
624 ds temp,arg1,temp /* 27th divide step */
625 addc retreg,retreg,retreg /* shift retreg with/into carry */
626 ds temp,arg1,temp /* 28th divide step */
627 addc retreg,retreg,retreg /* shift retreg with/into carry */
628 ds temp,arg1,temp /* 29th divide step */
629 addc retreg,retreg,retreg /* shift retreg with/into carry */
630 ds temp,arg1,temp /* 30th divide step */
631 addc retreg,retreg,retreg /* shift retreg with/into carry */
632 ds temp,arg1,temp /* 31st divide step */
633 addc retreg,retreg,retreg /* shift retreg with/into carry */
634 ds temp,arg1,temp /* 32nd divide step, */
635 MILLIRET
636 addc retreg,retreg,retreg /* shift last retreg bit into retreg */
637
638/* Handle the cases where divisor is a small constant or has high bit on. */
639LSYM(special_divisor)
640/* blr arg1,r0 */
641/* comib,>,n 0,arg1,LREF(big_divisor) ; nullify previous instruction */
642
643/* Pratap 8/13/90. The 815 Stirling chip set has a bug that prevents us from
644 generating such a blr, comib sequence. A problem in nullification. So I
645 rewrote this code. */
646
647#if defined(pa64)
648/* Clear the upper 32 bits of the arg1 register. We are working with
a7b376ee 649 small divisors (and 32-bit unsigned integers) We must not be mislead
48bd7758 650 by "1" bits left in the upper 32 bits. */
9db5bf71 651 depd %r0,31,32,%r25
48bd7758
AM
652#endif
653 comib,> 0,arg1,LREF(big_divisor)
654 nop
655 blr arg1,r0
656 nop
657
658LSYM(zero_divisor) /* this label is here to provide external visibility */
659 addit,= 0,arg1,0 /* trap for zero dvr */
660 nop
661 MILLIRET /* divisor == 1 */
662 copy arg0,retreg
663 MILLIRET /* divisor == 2 */
664 extru arg0,30,31,retreg
665 MILLI_BEN($$divU_3) /* divisor == 3 */
666 nop
667 MILLIRET /* divisor == 4 */
668 extru arg0,29,30,retreg
669 MILLI_BEN($$divU_5) /* divisor == 5 */
670 nop
671 MILLI_BEN($$divU_6) /* divisor == 6 */
672 nop
673 MILLI_BEN($$divU_7) /* divisor == 7 */
674 nop
675 MILLIRET /* divisor == 8 */
676 extru arg0,28,29,retreg
677 MILLI_BEN($$divU_9) /* divisor == 9 */
678 nop
679 MILLI_BEN($$divU_10) /* divisor == 10 */
680 nop
681 b LREF(normal) /* divisor == 11 */
682 ds r0,temp,r0 /* set V-bit to 1 */
683 MILLI_BEN($$divU_12) /* divisor == 12 */
684 nop
685 b LREF(normal) /* divisor == 13 */
686 ds r0,temp,r0 /* set V-bit to 1 */
687 MILLI_BEN($$divU_14) /* divisor == 14 */
688 nop
689 MILLI_BEN($$divU_15) /* divisor == 15 */
690 nop
691
692/* Handle the case where the high bit is on in the divisor.
693 Compute: if( dividend>=divisor) quotient=1; else quotient=0;
694 Note: dividend>==divisor iff dividend-divisor does not borrow
695 and not borrow iff carry. */
696LSYM(big_divisor)
697 sub arg0,arg1,r0
698 MILLIRET
699 addc r0,r0,retreg
700 .exit
701 .procend
702 .end
703#endif
704
705#ifdef L_remI
706/* ROUTINE: $$remI
707
708 DESCRIPTION:
709 . $$remI returns the remainder of the division of two signed 32-bit
710 . integers. The sign of the remainder is the same as the sign of
711 . the dividend.
712
713
714 INPUT REGISTERS:
715 . arg0 == dividend
716 . arg1 == divisor
717 . mrp == return pc
718 . sr0 == return space when called externally
719
720 OUTPUT REGISTERS:
721 . arg0 = destroyed
722 . arg1 = destroyed
723 . ret1 = remainder
724
725 OTHER REGISTERS AFFECTED:
726 . r1 = undefined
727
728 SIDE EFFECTS:
729 . Causes a trap under the following conditions: DIVIDE BY ZERO
730 . Changes memory at the following places: NONE
731
732 PERMISSIBLE CONTEXT:
733 . Unwindable
734 . Does not create a stack frame
735 . Is usable for internal or external microcode
736
737 DISCUSSION:
738 . Calls other millicode routines via mrp: NONE
739 . Calls other millicode routines: NONE */
740
741RDEFINE(tmp,r1)
742RDEFINE(retreg,ret1)
743
744 SUBSPA_MILLI
745 ATTR_MILLI
746 .proc
747 .callinfo millicode
748 .entry
749GSYM($$remI)
750GSYM($$remoI)
751 .export $$remI,MILLICODE
752 .export $$remoI,MILLICODE
753 ldo -1(arg1),tmp /* is there at most one bit set ? */
754 and,<> arg1,tmp,r0 /* if not, don't use power of 2 */
755 addi,> 0,arg1,r0 /* if denominator > 0, use power */
756 /* of 2 */
757 b,n LREF(neg_denom)
758LSYM(pow2)
759 comb,>,n 0,arg0,LREF(neg_num) /* is numerator < 0 ? */
760 and arg0,tmp,retreg /* get the result */
761 MILLIRETN
762LSYM(neg_num)
763 subi 0,arg0,arg0 /* negate numerator */
764 and arg0,tmp,retreg /* get the result */
765 subi 0,retreg,retreg /* negate result */
766 MILLIRETN
767LSYM(neg_denom)
768 addi,< 0,arg1,r0 /* if arg1 >= 0, it's not power */
769 /* of 2 */
770 b,n LREF(regular_seq)
771 sub r0,arg1,tmp /* make denominator positive */
772 comb,=,n arg1,tmp,LREF(regular_seq) /* test against 0x80000000 and 0 */
773 ldo -1(tmp),retreg /* is there at most one bit set ? */
774 and,= tmp,retreg,r0 /* if not, go to regular_seq */
775 b,n LREF(regular_seq)
776 comb,>,n 0,arg0,LREF(neg_num_2) /* if arg0 < 0, negate it */
777 and arg0,retreg,retreg
778 MILLIRETN
779LSYM(neg_num_2)
780 subi 0,arg0,tmp /* test against 0x80000000 */
781 and tmp,retreg,retreg
782 subi 0,retreg,retreg
783 MILLIRETN
784LSYM(regular_seq)
785 addit,= 0,arg1,0 /* trap if div by zero */
786 add,>= 0,arg0,retreg /* move dividend, if retreg < 0, */
787 sub 0,retreg,retreg /* make it positive */
788 sub 0,arg1, tmp /* clear carry, */
789 /* negate the divisor */
790 ds 0, tmp,0 /* set V-bit to the comple- */
791 /* ment of the divisor sign */
792 or 0,0, tmp /* clear tmp */
793 add retreg,retreg,retreg /* shift msb bit into carry */
794 ds tmp,arg1, tmp /* 1st divide step, if no carry */
795 /* out, msb of quotient = 0 */
796 addc retreg,retreg,retreg /* shift retreg with/into carry */
797LSYM(t1)
798 ds tmp,arg1, tmp /* 2nd divide step */
799 addc retreg,retreg,retreg /* shift retreg with/into carry */
800 ds tmp,arg1, tmp /* 3rd divide step */
801 addc retreg,retreg,retreg /* shift retreg with/into carry */
802 ds tmp,arg1, tmp /* 4th divide step */
803 addc retreg,retreg,retreg /* shift retreg with/into carry */
804 ds tmp,arg1, tmp /* 5th divide step */
805 addc retreg,retreg,retreg /* shift retreg with/into carry */
806 ds tmp,arg1, tmp /* 6th divide step */
807 addc retreg,retreg,retreg /* shift retreg with/into carry */
808 ds tmp,arg1, tmp /* 7th divide step */
809 addc retreg,retreg,retreg /* shift retreg with/into carry */
810 ds tmp,arg1, tmp /* 8th divide step */
811 addc retreg,retreg,retreg /* shift retreg with/into carry */
812 ds tmp,arg1, tmp /* 9th divide step */
813 addc retreg,retreg,retreg /* shift retreg with/into carry */
814 ds tmp,arg1, tmp /* 10th divide step */
815 addc retreg,retreg,retreg /* shift retreg with/into carry */
816 ds tmp,arg1, tmp /* 11th divide step */
817 addc retreg,retreg,retreg /* shift retreg with/into carry */
818 ds tmp,arg1, tmp /* 12th divide step */
819 addc retreg,retreg,retreg /* shift retreg with/into carry */
820 ds tmp,arg1, tmp /* 13th divide step */
821 addc retreg,retreg,retreg /* shift retreg with/into carry */
822 ds tmp,arg1, tmp /* 14th divide step */
823 addc retreg,retreg,retreg /* shift retreg with/into carry */
824 ds tmp,arg1, tmp /* 15th divide step */
825 addc retreg,retreg,retreg /* shift retreg with/into carry */
826 ds tmp,arg1, tmp /* 16th divide step */
827 addc retreg,retreg,retreg /* shift retreg with/into carry */
828 ds tmp,arg1, tmp /* 17th divide step */
829 addc retreg,retreg,retreg /* shift retreg with/into carry */
830 ds tmp,arg1, tmp /* 18th divide step */
831 addc retreg,retreg,retreg /* shift retreg with/into carry */
832 ds tmp,arg1, tmp /* 19th divide step */
833 addc retreg,retreg,retreg /* shift retreg with/into carry */
834 ds tmp,arg1, tmp /* 20th divide step */
835 addc retreg,retreg,retreg /* shift retreg with/into carry */
836 ds tmp,arg1, tmp /* 21st divide step */
837 addc retreg,retreg,retreg /* shift retreg with/into carry */
838 ds tmp,arg1, tmp /* 22nd divide step */
839 addc retreg,retreg,retreg /* shift retreg with/into carry */
840 ds tmp,arg1, tmp /* 23rd divide step */
841 addc retreg,retreg,retreg /* shift retreg with/into carry */
842 ds tmp,arg1, tmp /* 24th divide step */
843 addc retreg,retreg,retreg /* shift retreg with/into carry */
844 ds tmp,arg1, tmp /* 25th divide step */
845 addc retreg,retreg,retreg /* shift retreg with/into carry */
846 ds tmp,arg1, tmp /* 26th divide step */
847 addc retreg,retreg,retreg /* shift retreg with/into carry */
848 ds tmp,arg1, tmp /* 27th divide step */
849 addc retreg,retreg,retreg /* shift retreg with/into carry */
850 ds tmp,arg1, tmp /* 28th divide step */
851 addc retreg,retreg,retreg /* shift retreg with/into carry */
852 ds tmp,arg1, tmp /* 29th divide step */
853 addc retreg,retreg,retreg /* shift retreg with/into carry */
854 ds tmp,arg1, tmp /* 30th divide step */
855 addc retreg,retreg,retreg /* shift retreg with/into carry */
856 ds tmp,arg1, tmp /* 31st divide step */
857 addc retreg,retreg,retreg /* shift retreg with/into carry */
858 ds tmp,arg1, tmp /* 32nd divide step, */
859 addc retreg,retreg,retreg /* shift last bit into retreg */
860 movb,>=,n tmp,retreg,LREF(finish) /* branch if pos. tmp */
861 add,< arg1,0,0 /* if arg1 > 0, add arg1 */
862 add,tr tmp,arg1,retreg /* for correcting remainder tmp */
863 sub tmp,arg1,retreg /* else add absolute value arg1 */
864LSYM(finish)
865 add,>= arg0,0,0 /* set sign of remainder */
866 sub 0,retreg,retreg /* to sign of dividend */
867 MILLIRET
868 nop
869 .exit
870 .procend
871#ifdef milliext
872 .origin 0x00000200
873#endif
874 .end
875#endif
876
877#ifdef L_remU
878/* ROUTINE: $$remU
879 . Single precision divide for remainder with unsigned binary integers.
880 .
881 . The remainder must be dividend-(dividend/divisor)*divisor.
882 . Divide by zero is trapped.
883
884 INPUT REGISTERS:
885 . arg0 == dividend
886 . arg1 == divisor
887 . mrp == return pc
888 . sr0 == return space when called externally
889
890 OUTPUT REGISTERS:
891 . arg0 = undefined
892 . arg1 = undefined
893 . ret1 = remainder
894
895 OTHER REGISTERS AFFECTED:
896 . r1 = undefined
897
898 SIDE EFFECTS:
899 . Causes a trap under the following conditions: DIVIDE BY ZERO
900 . Changes memory at the following places: NONE
901
902 PERMISSIBLE CONTEXT:
903 . Unwindable.
904 . Does not create a stack frame.
905 . Suitable for internal or external millicode.
906 . Assumes the special millicode register conventions.
907
908 DISCUSSION:
909 . Calls other millicode routines using mrp: NONE
910 . Calls other millicode routines: NONE */
911
912
913RDEFINE(temp,r1)
914RDEFINE(rmndr,ret1) /* r29 */
915 SUBSPA_MILLI
916 ATTR_MILLI
917 .export $$remU,millicode
918 .proc
919 .callinfo millicode
920 .entry
921GSYM($$remU)
922 ldo -1(arg1),temp /* is there at most one bit set ? */
923 and,= arg1,temp,r0 /* if not, don't use power of 2 */
924 b LREF(regular_seq)
925 addit,= 0,arg1,r0 /* trap on div by zero */
926 and arg0,temp,rmndr /* get the result for power of 2 */
927 MILLIRETN
928LSYM(regular_seq)
929 comib,>=,n 0,arg1,LREF(special_case)
930 subi 0,arg1,rmndr /* clear carry, negate the divisor */
931 ds r0,rmndr,r0 /* set V-bit to 1 */
932 add arg0,arg0,temp /* shift msb bit into carry */
933 ds r0,arg1,rmndr /* 1st divide step, if no carry */
934 addc temp,temp,temp /* shift temp with/into carry */
935 ds rmndr,arg1,rmndr /* 2nd divide step */
936 addc temp,temp,temp /* shift temp with/into carry */
937 ds rmndr,arg1,rmndr /* 3rd divide step */
938 addc temp,temp,temp /* shift temp with/into carry */
939 ds rmndr,arg1,rmndr /* 4th divide step */
940 addc temp,temp,temp /* shift temp with/into carry */
941 ds rmndr,arg1,rmndr /* 5th divide step */
942 addc temp,temp,temp /* shift temp with/into carry */
943 ds rmndr,arg1,rmndr /* 6th divide step */
944 addc temp,temp,temp /* shift temp with/into carry */
945 ds rmndr,arg1,rmndr /* 7th divide step */
946 addc temp,temp,temp /* shift temp with/into carry */
947 ds rmndr,arg1,rmndr /* 8th divide step */
948 addc temp,temp,temp /* shift temp with/into carry */
949 ds rmndr,arg1,rmndr /* 9th divide step */
950 addc temp,temp,temp /* shift temp with/into carry */
951 ds rmndr,arg1,rmndr /* 10th divide step */
952 addc temp,temp,temp /* shift temp with/into carry */
953 ds rmndr,arg1,rmndr /* 11th divide step */
954 addc temp,temp,temp /* shift temp with/into carry */
955 ds rmndr,arg1,rmndr /* 12th divide step */
956 addc temp,temp,temp /* shift temp with/into carry */
957 ds rmndr,arg1,rmndr /* 13th divide step */
958 addc temp,temp,temp /* shift temp with/into carry */
959 ds rmndr,arg1,rmndr /* 14th divide step */
960 addc temp,temp,temp /* shift temp with/into carry */
961 ds rmndr,arg1,rmndr /* 15th divide step */
962 addc temp,temp,temp /* shift temp with/into carry */
963 ds rmndr,arg1,rmndr /* 16th divide step */
964 addc temp,temp,temp /* shift temp with/into carry */
965 ds rmndr,arg1,rmndr /* 17th divide step */
966 addc temp,temp,temp /* shift temp with/into carry */
967 ds rmndr,arg1,rmndr /* 18th divide step */
968 addc temp,temp,temp /* shift temp with/into carry */
969 ds rmndr,arg1,rmndr /* 19th divide step */
970 addc temp,temp,temp /* shift temp with/into carry */
971 ds rmndr,arg1,rmndr /* 20th divide step */
972 addc temp,temp,temp /* shift temp with/into carry */
973 ds rmndr,arg1,rmndr /* 21st divide step */
974 addc temp,temp,temp /* shift temp with/into carry */
975 ds rmndr,arg1,rmndr /* 22nd divide step */
976 addc temp,temp,temp /* shift temp with/into carry */
977 ds rmndr,arg1,rmndr /* 23rd divide step */
978 addc temp,temp,temp /* shift temp with/into carry */
979 ds rmndr,arg1,rmndr /* 24th divide step */
980 addc temp,temp,temp /* shift temp with/into carry */
981 ds rmndr,arg1,rmndr /* 25th divide step */
982 addc temp,temp,temp /* shift temp with/into carry */
983 ds rmndr,arg1,rmndr /* 26th divide step */
984 addc temp,temp,temp /* shift temp with/into carry */
985 ds rmndr,arg1,rmndr /* 27th divide step */
986 addc temp,temp,temp /* shift temp with/into carry */
987 ds rmndr,arg1,rmndr /* 28th divide step */
988 addc temp,temp,temp /* shift temp with/into carry */
989 ds rmndr,arg1,rmndr /* 29th divide step */
990 addc temp,temp,temp /* shift temp with/into carry */
991 ds rmndr,arg1,rmndr /* 30th divide step */
992 addc temp,temp,temp /* shift temp with/into carry */
993 ds rmndr,arg1,rmndr /* 31st divide step */
994 addc temp,temp,temp /* shift temp with/into carry */
995 ds rmndr,arg1,rmndr /* 32nd divide step, */
996 comiclr,<= 0,rmndr,r0
997 add rmndr,arg1,rmndr /* correction */
998 MILLIRETN
999 nop
1000
1001/* Putting >= on the last DS and deleting COMICLR does not work! */
1002LSYM(special_case)
1003 sub,>>= arg0,arg1,rmndr
1004 copy arg0,rmndr
1005 MILLIRETN
1006 nop
1007 .exit
1008 .procend
1009 .end
1010#endif
1011
1012#ifdef L_div_const
1013/* ROUTINE: $$divI_2
1014 . $$divI_3 $$divU_3
1015 . $$divI_4
1016 . $$divI_5 $$divU_5
1017 . $$divI_6 $$divU_6
1018 . $$divI_7 $$divU_7
1019 . $$divI_8
1020 . $$divI_9 $$divU_9
1021 . $$divI_10 $$divU_10
1022 .
1023 . $$divI_12 $$divU_12
1024 .
1025 . $$divI_14 $$divU_14
1026 . $$divI_15 $$divU_15
1027 . $$divI_16
1028 . $$divI_17 $$divU_17
1029 .
1030 . Divide by selected constants for single precision binary integers.
1031
1032 INPUT REGISTERS:
1033 . arg0 == dividend
1034 . mrp == return pc
1035 . sr0 == return space when called externally
1036
1037 OUTPUT REGISTERS:
1038 . arg0 = undefined
1039 . arg1 = undefined
1040 . ret1 = quotient
1041
1042 OTHER REGISTERS AFFECTED:
1043 . r1 = undefined
1044
1045 SIDE EFFECTS:
1046 . Causes a trap under the following conditions: NONE
1047 . Changes memory at the following places: NONE
1048
1049 PERMISSIBLE CONTEXT:
1050 . Unwindable.
1051 . Does not create a stack frame.
1052 . Suitable for internal or external millicode.
1053 . Assumes the special millicode register conventions.
1054
1055 DISCUSSION:
1056 . Calls other millicode routines using mrp: NONE
1057 . Calls other millicode routines: NONE */
1058
1059
1060/* TRUNCATED DIVISION BY SMALL INTEGERS
1061
1062 We are interested in q(x) = floor(x/y), where x >= 0 and y > 0
1063 (with y fixed).
1064
1065 Let a = floor(z/y), for some choice of z. Note that z will be
1066 chosen so that division by z is cheap.
1067
1068 Let r be the remainder(z/y). In other words, r = z - ay.
1069
1070 Now, our method is to choose a value for b such that
1071
1072 q'(x) = floor((ax+b)/z)
1073
1074 is equal to q(x) over as large a range of x as possible. If the
1075 two are equal over a sufficiently large range, and if it is easy to
1076 form the product (ax), and it is easy to divide by z, then we can
1077 perform the division much faster than the general division algorithm.
1078
1079 So, we want the following to be true:
1080
1081 . For x in the following range:
1082 .
1083 . ky <= x < (k+1)y
1084 .
1085 . implies that
1086 .
1087 . k <= (ax+b)/z < (k+1)
1088
1089 We want to determine b such that this is true for all k in the
1090 range {0..K} for some maximum K.
1091
1092 Since (ax+b) is an increasing function of x, we can take each
1093 bound separately to determine the "best" value for b.
1094
1095 (ax+b)/z < (k+1) implies
1096
1097 (a((k+1)y-1)+b < (k+1)z implies
1098
1099 b < a + (k+1)(z-ay) implies
1100
1101 b < a + (k+1)r
1102
1103 This needs to be true for all k in the range {0..K}. In
1104 particular, it is true for k = 0 and this leads to a maximum
1105 acceptable value for b.
1106
1107 b < a+r or b <= a+r-1
1108
1109 Taking the other bound, we have
1110
1111 k <= (ax+b)/z implies
1112
1113 k <= (aky+b)/z implies
1114
1115 k(z-ay) <= b implies
1116
1117 kr <= b
1118
1119 Clearly, the largest range for k will be achieved by maximizing b,
1120 when r is not zero. When r is zero, then the simplest choice for b
1121 is 0. When r is not 0, set
1122
1123 . b = a+r-1
1124
1125 Now, by construction, q'(x) = floor((ax+b)/z) = q(x) = floor(x/y)
1126 for all x in the range:
1127
1128 . 0 <= x < (K+1)y
1129
1130 We need to determine what K is. Of our two bounds,
1131
1132 . b < a+(k+1)r is satisfied for all k >= 0, by construction.
1133
1134 The other bound is
1135
1136 . kr <= b
1137
1138 This is always true if r = 0. If r is not 0 (the usual case), then
1139 K = floor((a+r-1)/r), is the maximum value for k.
1140
1141 Therefore, the formula q'(x) = floor((ax+b)/z) yields the correct
1142 answer for q(x) = floor(x/y) when x is in the range
1143
1144 (0,(K+1)y-1) K = floor((a+r-1)/r)
1145
1146 To be most useful, we want (K+1)y-1 = (max x) >= 2**32-1 so that
1147 the formula for q'(x) yields the correct value of q(x) for all x
1148 representable by a single word in HPPA.
1149
1150 We are also constrained in that computing the product (ax), adding
1151 b, and dividing by z must all be done quickly, otherwise we will be
1152 better off going through the general algorithm using the DS
1153 instruction, which uses approximately 70 cycles.
1154
1155 For each y, there is a choice of z which satisfies the constraints
1156 for (K+1)y >= 2**32. We may not, however, be able to satisfy the
1157 timing constraints for arbitrary y. It seems that z being equal to
1158 a power of 2 or a power of 2 minus 1 is as good as we can do, since
1159 it minimizes the time to do division by z. We want the choice of z
1160 to also result in a value for (a) that minimizes the computation of
1161 the product (ax). This is best achieved if (a) has a regular bit
1162 pattern (so the multiplication can be done with shifts and adds).
1163 The value of (a) also needs to be less than 2**32 so the product is
1164 always guaranteed to fit in 2 words.
1165
1166 In actual practice, the following should be done:
1167
1168 1) For negative x, you should take the absolute value and remember
1169 . the fact so that the result can be negated. This obviously does
1170 . not apply in the unsigned case.
1171 2) For even y, you should factor out the power of 2 that divides y
1172 . and divide x by it. You can then proceed by dividing by the
1173 . odd factor of y.
1174
1175 Here is a table of some odd values of y, and corresponding choices
1176 for z which are "good".
1177
1178 y z r a (hex) max x (hex)
1179
1180 3 2**32 1 55555555 100000001
1181 5 2**32 1 33333333 100000003
1182 7 2**24-1 0 249249 (infinite)
1183 9 2**24-1 0 1c71c7 (infinite)
1184 11 2**20-1 0 1745d (infinite)
1185 13 2**24-1 0 13b13b (infinite)
1186 15 2**32 1 11111111 10000000d
1187 17 2**32 1 f0f0f0f 10000000f
1188
1189 If r is 1, then b = a+r-1 = a. This simplifies the computation
1190 of (ax+b), since you can compute (x+1)(a) instead. If r is 0,
1191 then b = 0 is ok to use which simplifies (ax+b).
1192
1193 The bit patterns for 55555555, 33333333, and 11111111 are obviously
1194 very regular. The bit patterns for the other values of a above are:
1195
1196 y (hex) (binary)
1197
1198 7 249249 001001001001001001001001 << regular >>
1199 9 1c71c7 000111000111000111000111 << regular >>
1200 11 1745d 000000010111010001011101 << irregular >>
1201 13 13b13b 000100111011000100111011 << irregular >>
1202
1203 The bit patterns for (a) corresponding to (y) of 11 and 13 may be
1204 too irregular to warrant using this method.
1205
1206 When z is a power of 2 minus 1, then the division by z is slightly
1207 more complicated, involving an iterative solution.
1208
1209 The code presented here solves division by 1 through 17, except for
1210 11 and 13. There are algorithms for both signed and unsigned
1211 quantities given.
1212
1213 TIMINGS (cycles)
1214
1215 divisor positive negative unsigned
1216
1217 . 1 2 2 2
1218 . 2 4 4 2
1219 . 3 19 21 19
1220 . 4 4 4 2
1221 . 5 18 22 19
1222 . 6 19 22 19
1223 . 8 4 4 2
1224 . 10 18 19 17
1225 . 12 18 20 18
1226 . 15 16 18 16
1227 . 16 4 4 2
1228 . 17 16 18 16
1229
1230 Now, the algorithm for 7, 9, and 14 is an iterative one. That is,
1231 a loop body is executed until the tentative quotient is 0. The
1232 number of times the loop body is executed varies depending on the
1233 dividend, but is never more than two times. If the dividend is
1234 less than the divisor, then the loop body is not executed at all.
1235 Each iteration adds 4 cycles to the timings.
1236
1237 divisor positive negative unsigned
1238
1239 . 7 19+4n 20+4n 20+4n n = number of iterations
1240 . 9 21+4n 22+4n 21+4n
1241 . 14 21+4n 22+4n 20+4n
1242
1243 To give an idea of how the number of iterations varies, here is a
1244 table of dividend versus number of iterations when dividing by 7.
1245
1246 smallest largest required
1247 dividend dividend iterations
1248
1249 . 0 6 0
1250 . 7 0x6ffffff 1
1251 0x1000006 0xffffffff 2
1252
1253 There is some overlap in the range of numbers requiring 1 and 2
1254 iterations. */
1255
1256RDEFINE(t2,r1)
1257RDEFINE(x2,arg0) /* r26 */
1258RDEFINE(t1,arg1) /* r25 */
1259RDEFINE(x1,ret1) /* r29 */
1260
1261 SUBSPA_MILLI_DIV
1262 ATTR_MILLI
1263
1264 .proc
1265 .callinfo millicode
1266 .entry
1267/* NONE of these routines require a stack frame
1268 ALL of these routines are unwindable from millicode */
1269
1270GSYM($$divide_by_constant)
1271 .export $$divide_by_constant,millicode
1272/* Provides a "nice" label for the code covered by the unwind descriptor
1273 for things like gprof. */
1274
1275/* DIVISION BY 2 (shift by 1) */
1276GSYM($$divI_2)
1277 .export $$divI_2,millicode
1278 comclr,>= arg0,0,0
1279 addi 1,arg0,arg0
1280 MILLIRET
1281 extrs arg0,30,31,ret1
1282
1283
1284/* DIVISION BY 4 (shift by 2) */
1285GSYM($$divI_4)
1286 .export $$divI_4,millicode
1287 comclr,>= arg0,0,0
1288 addi 3,arg0,arg0
1289 MILLIRET
1290 extrs arg0,29,30,ret1
1291
1292
1293/* DIVISION BY 8 (shift by 3) */
1294GSYM($$divI_8)
1295 .export $$divI_8,millicode
1296 comclr,>= arg0,0,0
1297 addi 7,arg0,arg0
1298 MILLIRET
1299 extrs arg0,28,29,ret1
1300
1301/* DIVISION BY 16 (shift by 4) */
1302GSYM($$divI_16)
1303 .export $$divI_16,millicode
1304 comclr,>= arg0,0,0
1305 addi 15,arg0,arg0
1306 MILLIRET
1307 extrs arg0,27,28,ret1
1308
1309/****************************************************************************
1310*
1311* DIVISION BY DIVISORS OF FFFFFFFF, and powers of 2 times these
1312*
1313* includes 3,5,15,17 and also 6,10,12
1314*
1315****************************************************************************/
1316
1317/* DIVISION BY 3 (use z = 2**32; a = 55555555) */
1318
1319GSYM($$divI_3)
1320 .export $$divI_3,millicode
1321 comb,<,N x2,0,LREF(neg3)
1322
1e5f1716 1323 addi 1,x2,x2 /* this cannot overflow */
48bd7758
AM
1324 extru x2,1,2,x1 /* multiply by 5 to get started */
1325 sh2add x2,x2,x2
1326 b LREF(pos)
1327 addc x1,0,x1
1328
1329LSYM(neg3)
1e5f1716 1330 subi 1,x2,x2 /* this cannot overflow */
48bd7758
AM
1331 extru x2,1,2,x1 /* multiply by 5 to get started */
1332 sh2add x2,x2,x2
1333 b LREF(neg)
1334 addc x1,0,x1
1335
1336GSYM($$divU_3)
1337 .export $$divU_3,millicode
1338 addi 1,x2,x2 /* this CAN overflow */
1339 addc 0,0,x1
1340 shd x1,x2,30,t1 /* multiply by 5 to get started */
1341 sh2add x2,x2,x2
1342 b LREF(pos)
1343 addc x1,t1,x1
1344
1345/* DIVISION BY 5 (use z = 2**32; a = 33333333) */
1346
1347GSYM($$divI_5)
1348 .export $$divI_5,millicode
1349 comb,<,N x2,0,LREF(neg5)
1350
1e5f1716 1351 addi 3,x2,t1 /* this cannot overflow */
48bd7758
AM
1352 sh1add x2,t1,x2 /* multiply by 3 to get started */
1353 b LREF(pos)
1354 addc 0,0,x1
1355
1356LSYM(neg5)
1357 sub 0,x2,x2 /* negate x2 */
1e5f1716 1358 addi 1,x2,x2 /* this cannot overflow */
48bd7758
AM
1359 shd 0,x2,31,x1 /* get top bit (can be 1) */
1360 sh1add x2,x2,x2 /* multiply by 3 to get started */
1361 b LREF(neg)
1362 addc x1,0,x1
1363
1364GSYM($$divU_5)
1365 .export $$divU_5,millicode
1366 addi 1,x2,x2 /* this CAN overflow */
1367 addc 0,0,x1
1368 shd x1,x2,31,t1 /* multiply by 3 to get started */
1369 sh1add x2,x2,x2
1370 b LREF(pos)
1371 addc t1,x1,x1
1372
1373/* DIVISION BY 6 (shift to divide by 2 then divide by 3) */
1374GSYM($$divI_6)
1375 .export $$divI_6,millicode
1376 comb,<,N x2,0,LREF(neg6)
1377 extru x2,30,31,x2 /* divide by 2 */
1378 addi 5,x2,t1 /* compute 5*(x2+1) = 5*x2+5 */
1379 sh2add x2,t1,x2 /* multiply by 5 to get started */
1380 b LREF(pos)
1381 addc 0,0,x1
1382
1383LSYM(neg6)
1384 subi 2,x2,x2 /* negate, divide by 2, and add 1 */
1385 /* negation and adding 1 are done */
1386 /* at the same time by the SUBI */
1387 extru x2,30,31,x2
1388 shd 0,x2,30,x1
1389 sh2add x2,x2,x2 /* multiply by 5 to get started */
1390 b LREF(neg)
1391 addc x1,0,x1
1392
1393GSYM($$divU_6)
1394 .export $$divU_6,millicode
1395 extru x2,30,31,x2 /* divide by 2 */
1e5f1716 1396 addi 1,x2,x2 /* cannot carry */
48bd7758
AM
1397 shd 0,x2,30,x1 /* multiply by 5 to get started */
1398 sh2add x2,x2,x2
1399 b LREF(pos)
1400 addc x1,0,x1
1401
1402/* DIVISION BY 10 (shift to divide by 2 then divide by 5) */
1403GSYM($$divU_10)
1404 .export $$divU_10,millicode
1405 extru x2,30,31,x2 /* divide by 2 */
1406 addi 3,x2,t1 /* compute 3*(x2+1) = (3*x2)+3 */
1407 sh1add x2,t1,x2 /* multiply by 3 to get started */
1408 addc 0,0,x1
1409LSYM(pos)
1410 shd x1,x2,28,t1 /* multiply by 0x11 */
1411 shd x2,0,28,t2
1412 add x2,t2,x2
1413 addc x1,t1,x1
1414LSYM(pos_for_17)
1415 shd x1,x2,24,t1 /* multiply by 0x101 */
1416 shd x2,0,24,t2
1417 add x2,t2,x2
1418 addc x1,t1,x1
1419
1420 shd x1,x2,16,t1 /* multiply by 0x10001 */
1421 shd x2,0,16,t2
1422 add x2,t2,x2
1423 MILLIRET
1424 addc x1,t1,x1
1425
1426GSYM($$divI_10)
1427 .export $$divI_10,millicode
1428 comb,< x2,0,LREF(neg10)
1429 copy 0,x1
1430 extru x2,30,31,x2 /* divide by 2 */
1e5f1716 1431 addib,TR 1,x2,LREF(pos) /* add 1 (cannot overflow) */
48bd7758
AM
1432 sh1add x2,x2,x2 /* multiply by 3 to get started */
1433
1434LSYM(neg10)
1435 subi 2,x2,x2 /* negate, divide by 2, and add 1 */
1436 /* negation and adding 1 are done */
1437 /* at the same time by the SUBI */
1438 extru x2,30,31,x2
1439 sh1add x2,x2,x2 /* multiply by 3 to get started */
1440LSYM(neg)
1441 shd x1,x2,28,t1 /* multiply by 0x11 */
1442 shd x2,0,28,t2
1443 add x2,t2,x2
1444 addc x1,t1,x1
1445LSYM(neg_for_17)
1446 shd x1,x2,24,t1 /* multiply by 0x101 */
1447 shd x2,0,24,t2
1448 add x2,t2,x2
1449 addc x1,t1,x1
1450
1451 shd x1,x2,16,t1 /* multiply by 0x10001 */
1452 shd x2,0,16,t2
1453 add x2,t2,x2
1454 addc x1,t1,x1
1455 MILLIRET
1456 sub 0,x1,x1
1457
1458/* DIVISION BY 12 (shift to divide by 4 then divide by 3) */
1459GSYM($$divI_12)
1460 .export $$divI_12,millicode
1461 comb,< x2,0,LREF(neg12)
1462 copy 0,x1
1463 extru x2,29,30,x2 /* divide by 4 */
1464 addib,tr 1,x2,LREF(pos) /* compute 5*(x2+1) = 5*x2+5 */
1465 sh2add x2,x2,x2 /* multiply by 5 to get started */
1466
1467LSYM(neg12)
1468 subi 4,x2,x2 /* negate, divide by 4, and add 1 */
1469 /* negation and adding 1 are done */
1470 /* at the same time by the SUBI */
1471 extru x2,29,30,x2
1472 b LREF(neg)
1473 sh2add x2,x2,x2 /* multiply by 5 to get started */
1474
1475GSYM($$divU_12)
1476 .export $$divU_12,millicode
1477 extru x2,29,30,x2 /* divide by 4 */
1e5f1716 1478 addi 5,x2,t1 /* cannot carry */
48bd7758
AM
1479 sh2add x2,t1,x2 /* multiply by 5 to get started */
1480 b LREF(pos)
1481 addc 0,0,x1
1482
1483/* DIVISION BY 15 (use z = 2**32; a = 11111111) */
1484GSYM($$divI_15)
1485 .export $$divI_15,millicode
1486 comb,< x2,0,LREF(neg15)
1487 copy 0,x1
1488 addib,tr 1,x2,LREF(pos)+4
1489 shd x1,x2,28,t1
1490
1491LSYM(neg15)
1492 b LREF(neg)
1493 subi 1,x2,x2
1494
1495GSYM($$divU_15)
1496 .export $$divU_15,millicode
1497 addi 1,x2,x2 /* this CAN overflow */
1498 b LREF(pos)
1499 addc 0,0,x1
1500
1501/* DIVISION BY 17 (use z = 2**32; a = f0f0f0f) */
1502GSYM($$divI_17)
1503 .export $$divI_17,millicode
1504 comb,<,n x2,0,LREF(neg17)
1e5f1716 1505 addi 1,x2,x2 /* this cannot overflow */
48bd7758
AM
1506 shd 0,x2,28,t1 /* multiply by 0xf to get started */
1507 shd x2,0,28,t2
1508 sub t2,x2,x2
1509 b LREF(pos_for_17)
1510 subb t1,0,x1
1511
1512LSYM(neg17)
1e5f1716 1513 subi 1,x2,x2 /* this cannot overflow */
48bd7758
AM
1514 shd 0,x2,28,t1 /* multiply by 0xf to get started */
1515 shd x2,0,28,t2
1516 sub t2,x2,x2
1517 b LREF(neg_for_17)
1518 subb t1,0,x1
1519
1520GSYM($$divU_17)
1521 .export $$divU_17,millicode
1522 addi 1,x2,x2 /* this CAN overflow */
1523 addc 0,0,x1
1524 shd x1,x2,28,t1 /* multiply by 0xf to get started */
1525LSYM(u17)
1526 shd x2,0,28,t2
1527 sub t2,x2,x2
1528 b LREF(pos_for_17)
1529 subb t1,x1,x1
1530
1531
1532/* DIVISION BY DIVISORS OF FFFFFF, and powers of 2 times these
1533 includes 7,9 and also 14
1534
1535
1536 z = 2**24-1
1537 r = z mod x = 0
1538
1539 so choose b = 0
1540
1541 Also, in order to divide by z = 2**24-1, we approximate by dividing
1542 by (z+1) = 2**24 (which is easy), and then correcting.
1543
1544 (ax) = (z+1)q' + r
1545 . = zq' + (q'+r)
1546
1547 So to compute (ax)/z, compute q' = (ax)/(z+1) and r = (ax) mod (z+1)
1548 Then the true remainder of (ax)/z is (q'+r). Repeat the process
1549 with this new remainder, adding the tentative quotients together,
1550 until a tentative quotient is 0 (and then we are done). There is
1551 one last correction to be done. It is possible that (q'+r) = z.
1552 If so, then (q'+r)/(z+1) = 0 and it looks like we are done. But,
1553 in fact, we need to add 1 more to the quotient. Now, it turns
1554 out that this happens if and only if the original value x is
1555 an exact multiple of y. So, to avoid a three instruction test at
1556 the end, instead use 1 instruction to add 1 to x at the beginning. */
1557
1558/* DIVISION BY 7 (use z = 2**24-1; a = 249249) */
1559GSYM($$divI_7)
1560 .export $$divI_7,millicode
1561 comb,<,n x2,0,LREF(neg7)
1562LSYM(7)
1e5f1716 1563 addi 1,x2,x2 /* cannot overflow */
48bd7758
AM
1564 shd 0,x2,29,x1
1565 sh3add x2,x2,x2
1566 addc x1,0,x1
1567LSYM(pos7)
1568 shd x1,x2,26,t1
1569 shd x2,0,26,t2
1570 add x2,t2,x2
1571 addc x1,t1,x1
1572
1573 shd x1,x2,20,t1
1574 shd x2,0,20,t2
1575 add x2,t2,x2
1576 addc x1,t1,t1
1577
1578 /* computed <t1,x2>. Now divide it by (2**24 - 1) */
1579
1580 copy 0,x1
1581 shd,= t1,x2,24,t1 /* tentative quotient */
1582LSYM(1)
1583 addb,tr t1,x1,LREF(2) /* add to previous quotient */
1584 extru x2,31,24,x2 /* new remainder (unadjusted) */
1585
1586 MILLIRETN
1587
1588LSYM(2)
1589 addb,tr t1,x2,LREF(1) /* adjust remainder */
1590 extru,= x2,7,8,t1 /* new quotient */
1591
1592LSYM(neg7)
1593 subi 1,x2,x2 /* negate x2 and add 1 */
1594LSYM(8)
1595 shd 0,x2,29,x1
1596 sh3add x2,x2,x2
1597 addc x1,0,x1
1598
1599LSYM(neg7_shift)
1600 shd x1,x2,26,t1
1601 shd x2,0,26,t2
1602 add x2,t2,x2
1603 addc x1,t1,x1
1604
1605 shd x1,x2,20,t1
1606 shd x2,0,20,t2
1607 add x2,t2,x2
1608 addc x1,t1,t1
1609
1610 /* computed <t1,x2>. Now divide it by (2**24 - 1) */
1611
1612 copy 0,x1
1613 shd,= t1,x2,24,t1 /* tentative quotient */
1614LSYM(3)
1615 addb,tr t1,x1,LREF(4) /* add to previous quotient */
1616 extru x2,31,24,x2 /* new remainder (unadjusted) */
1617
1618 MILLIRET
1619 sub 0,x1,x1 /* negate result */
1620
1621LSYM(4)
1622 addb,tr t1,x2,LREF(3) /* adjust remainder */
1623 extru,= x2,7,8,t1 /* new quotient */
1624
1625GSYM($$divU_7)
1626 .export $$divU_7,millicode
1627 addi 1,x2,x2 /* can carry */
1628 addc 0,0,x1
1629 shd x1,x2,29,t1
1630 sh3add x2,x2,x2
1631 b LREF(pos7)
1632 addc t1,x1,x1
1633
1634/* DIVISION BY 9 (use z = 2**24-1; a = 1c71c7) */
1635GSYM($$divI_9)
1636 .export $$divI_9,millicode
1637 comb,<,n x2,0,LREF(neg9)
1e5f1716 1638 addi 1,x2,x2 /* cannot overflow */
48bd7758
AM
1639 shd 0,x2,29,t1
1640 shd x2,0,29,t2
1641 sub t2,x2,x2
1642 b LREF(pos7)
1643 subb t1,0,x1
1644
1645LSYM(neg9)
1646 subi 1,x2,x2 /* negate and add 1 */
1647 shd 0,x2,29,t1
1648 shd x2,0,29,t2
1649 sub t2,x2,x2
1650 b LREF(neg7_shift)
1651 subb t1,0,x1
1652
1653GSYM($$divU_9)
1654 .export $$divU_9,millicode
1655 addi 1,x2,x2 /* can carry */
1656 addc 0,0,x1
1657 shd x1,x2,29,t1
1658 shd x2,0,29,t2
1659 sub t2,x2,x2
1660 b LREF(pos7)
1661 subb t1,x1,x1
1662
1663/* DIVISION BY 14 (shift to divide by 2 then divide by 7) */
1664GSYM($$divI_14)
1665 .export $$divI_14,millicode
1666 comb,<,n x2,0,LREF(neg14)
1667GSYM($$divU_14)
1668 .export $$divU_14,millicode
1669 b LREF(7) /* go to 7 case */
1670 extru x2,30,31,x2 /* divide by 2 */
1671
1672LSYM(neg14)
1673 subi 2,x2,x2 /* negate (and add 2) */
1674 b LREF(8)
1675 extru x2,30,31,x2 /* divide by 2 */
1676 .exit
1677 .procend
1678 .end
1679#endif
1680
1681#ifdef L_mulI
1682/* VERSION "@(#)$$mulI $ Revision: 12.4 $ $ Date: 94/03/17 17:18:51 $" */
1683/******************************************************************************
1684This routine is used on PA2.0 processors when gcc -mno-fpregs is used
1685
1686ROUTINE: $$mulI
1687
1688
1689DESCRIPTION:
1690
1691 $$mulI multiplies two single word integers, giving a single
1692 word result.
1693
1694
1695INPUT REGISTERS:
1696
1697 arg0 = Operand 1
1698 arg1 = Operand 2
1699 r31 == return pc
1700 sr0 == return space when called externally
1701
1702
1703OUTPUT REGISTERS:
1704
1705 arg0 = undefined
1706 arg1 = undefined
1707 ret1 = result
1708
1709OTHER REGISTERS AFFECTED:
1710
1711 r1 = undefined
1712
1713SIDE EFFECTS:
1714
1715 Causes a trap under the following conditions: NONE
1716 Changes memory at the following places: NONE
1717
1718PERMISSIBLE CONTEXT:
1719
1720 Unwindable
1721 Does not create a stack frame
1722 Is usable for internal or external microcode
1723
1724DISCUSSION:
1725
1726 Calls other millicode routines via mrp: NONE
1727 Calls other millicode routines: NONE
1728
1729***************************************************************************/
1730
1731
1732#define a0 %arg0
1733#define a1 %arg1
1734#define t0 %r1
1735#define r %ret1
1736
1737#define a0__128a0 zdep a0,24,25,a0
1738#define a0__256a0 zdep a0,23,24,a0
1739#define a1_ne_0_b_l0 comb,<> a1,0,LREF(l0)
1740#define a1_ne_0_b_l1 comb,<> a1,0,LREF(l1)
1741#define a1_ne_0_b_l2 comb,<> a1,0,LREF(l2)
1742#define b_n_ret_t0 b,n LREF(ret_t0)
1743#define b_e_shift b LREF(e_shift)
1744#define b_e_t0ma0 b LREF(e_t0ma0)
1745#define b_e_t0 b LREF(e_t0)
1746#define b_e_t0a0 b LREF(e_t0a0)
1747#define b_e_t02a0 b LREF(e_t02a0)
1748#define b_e_t04a0 b LREF(e_t04a0)
1749#define b_e_2t0 b LREF(e_2t0)
1750#define b_e_2t0a0 b LREF(e_2t0a0)
1751#define b_e_2t04a0 b LREF(e2t04a0)
1752#define b_e_3t0 b LREF(e_3t0)
1753#define b_e_4t0 b LREF(e_4t0)
1754#define b_e_4t0a0 b LREF(e_4t0a0)
1755#define b_e_4t08a0 b LREF(e4t08a0)
1756#define b_e_5t0 b LREF(e_5t0)
1757#define b_e_8t0 b LREF(e_8t0)
1758#define b_e_8t0a0 b LREF(e_8t0a0)
1759#define r__r_a0 add r,a0,r
1760#define r__r_2a0 sh1add a0,r,r
1761#define r__r_4a0 sh2add a0,r,r
1762#define r__r_8a0 sh3add a0,r,r
1763#define r__r_t0 add r,t0,r
1764#define r__r_2t0 sh1add t0,r,r
1765#define r__r_4t0 sh2add t0,r,r
1766#define r__r_8t0 sh3add t0,r,r
1767#define t0__3a0 sh1add a0,a0,t0
1768#define t0__4a0 sh2add a0,0,t0
1769#define t0__5a0 sh2add a0,a0,t0
1770#define t0__8a0 sh3add a0,0,t0
1771#define t0__9a0 sh3add a0,a0,t0
1772#define t0__16a0 zdep a0,27,28,t0
1773#define t0__32a0 zdep a0,26,27,t0
1774#define t0__64a0 zdep a0,25,26,t0
1775#define t0__128a0 zdep a0,24,25,t0
1776#define t0__t0ma0 sub t0,a0,t0
1777#define t0__t0_a0 add t0,a0,t0
1778#define t0__t0_2a0 sh1add a0,t0,t0
1779#define t0__t0_4a0 sh2add a0,t0,t0
1780#define t0__t0_8a0 sh3add a0,t0,t0
1781#define t0__2t0_a0 sh1add t0,a0,t0
1782#define t0__3t0 sh1add t0,t0,t0
1783#define t0__4t0 sh2add t0,0,t0
1784#define t0__4t0_a0 sh2add t0,a0,t0
1785#define t0__5t0 sh2add t0,t0,t0
1786#define t0__8t0 sh3add t0,0,t0
1787#define t0__8t0_a0 sh3add t0,a0,t0
1788#define t0__9t0 sh3add t0,t0,t0
1789#define t0__16t0 zdep t0,27,28,t0
1790#define t0__32t0 zdep t0,26,27,t0
1791#define t0__256a0 zdep a0,23,24,t0
1792
1793
1794 SUBSPA_MILLI
1795 ATTR_MILLI
1796 .align 16
1797 .proc
1798 .callinfo millicode
12df72e8 1799 .export $$mulI,millicode
48bd7758
AM
1800GSYM($$mulI)
1801 combt,<<= a1,a0,LREF(l4) /* swap args if unsigned a1>a0 */
1802 copy 0,r /* zero out the result */
1803 xor a0,a1,a0 /* swap a0 & a1 using the */
1804 xor a0,a1,a1 /* old xor trick */
1805 xor a0,a1,a0
1806LSYM(l4)
1807 combt,<= 0,a0,LREF(l3) /* if a0>=0 then proceed like unsigned */
1808 zdep a1,30,8,t0 /* t0 = (a1&0xff)<<1 ********* */
1809 sub,> 0,a1,t0 /* otherwise negate both and */
1810 combt,<=,n a0,t0,LREF(l2) /* swap back if |a0|<|a1| */
1811 sub 0,a0,a1
fe19a83d 1812 movb,tr,n t0,a0,LREF(l2) /* 10th inst. */
48bd7758
AM
1813
1814LSYM(l0) r__r_t0 /* add in this partial product */
1815LSYM(l1) a0__256a0 /* a0 <<= 8 ****************** */
1816LSYM(l2) zdep a1,30,8,t0 /* t0 = (a1&0xff)<<1 ********* */
1817LSYM(l3) blr t0,0 /* case on these 8 bits ****** */
1818 extru a1,23,24,a1 /* a1 >>= 8 ****************** */
1819
fe19a83d 1820/*16 insts before this. */
48bd7758
AM
1821/* a0 <<= 8 ************************** */
1822LSYM(x0) a1_ne_0_b_l2 ! a0__256a0 ! MILLIRETN ! nop
1823LSYM(x1) a1_ne_0_b_l1 ! r__r_a0 ! MILLIRETN ! nop
1824LSYM(x2) a1_ne_0_b_l1 ! r__r_2a0 ! MILLIRETN ! nop
1825LSYM(x3) a1_ne_0_b_l0 ! t0__3a0 ! MILLIRET ! r__r_t0
1826LSYM(x4) a1_ne_0_b_l1 ! r__r_4a0 ! MILLIRETN ! nop
1827LSYM(x5) a1_ne_0_b_l0 ! t0__5a0 ! MILLIRET ! r__r_t0
1828LSYM(x6) t0__3a0 ! a1_ne_0_b_l1 ! r__r_2t0 ! MILLIRETN
1829LSYM(x7) t0__3a0 ! a1_ne_0_b_l0 ! r__r_4a0 ! b_n_ret_t0
1830LSYM(x8) a1_ne_0_b_l1 ! r__r_8a0 ! MILLIRETN ! nop
1831LSYM(x9) a1_ne_0_b_l0 ! t0__9a0 ! MILLIRET ! r__r_t0
1832LSYM(x10) t0__5a0 ! a1_ne_0_b_l1 ! r__r_2t0 ! MILLIRETN
1833LSYM(x11) t0__3a0 ! a1_ne_0_b_l0 ! r__r_8a0 ! b_n_ret_t0
1834LSYM(x12) t0__3a0 ! a1_ne_0_b_l1 ! r__r_4t0 ! MILLIRETN
1835LSYM(x13) t0__5a0 ! a1_ne_0_b_l0 ! r__r_8a0 ! b_n_ret_t0
1836LSYM(x14) t0__3a0 ! t0__2t0_a0 ! b_e_shift ! r__r_2t0
1837LSYM(x15) t0__5a0 ! a1_ne_0_b_l0 ! t0__3t0 ! b_n_ret_t0
1838LSYM(x16) t0__16a0 ! a1_ne_0_b_l1 ! r__r_t0 ! MILLIRETN
1839LSYM(x17) t0__9a0 ! a1_ne_0_b_l0 ! t0__t0_8a0 ! b_n_ret_t0
1840LSYM(x18) t0__9a0 ! a1_ne_0_b_l1 ! r__r_2t0 ! MILLIRETN
1841LSYM(x19) t0__9a0 ! a1_ne_0_b_l0 ! t0__2t0_a0 ! b_n_ret_t0
1842LSYM(x20) t0__5a0 ! a1_ne_0_b_l1 ! r__r_4t0 ! MILLIRETN
1843LSYM(x21) t0__5a0 ! a1_ne_0_b_l0 ! t0__4t0_a0 ! b_n_ret_t0
1844LSYM(x22) t0__5a0 ! t0__2t0_a0 ! b_e_shift ! r__r_2t0
1845LSYM(x23) t0__5a0 ! t0__2t0_a0 ! b_e_t0 ! t0__2t0_a0
1846LSYM(x24) t0__3a0 ! a1_ne_0_b_l1 ! r__r_8t0 ! MILLIRETN
1847LSYM(x25) t0__5a0 ! a1_ne_0_b_l0 ! t0__5t0 ! b_n_ret_t0
1848LSYM(x26) t0__3a0 ! t0__4t0_a0 ! b_e_shift ! r__r_2t0
1849LSYM(x27) t0__3a0 ! a1_ne_0_b_l0 ! t0__9t0 ! b_n_ret_t0
1850LSYM(x28) t0__3a0 ! t0__2t0_a0 ! b_e_shift ! r__r_4t0
1851LSYM(x29) t0__3a0 ! t0__2t0_a0 ! b_e_t0 ! t0__4t0_a0
1852LSYM(x30) t0__5a0 ! t0__3t0 ! b_e_shift ! r__r_2t0
1853LSYM(x31) t0__32a0 ! a1_ne_0_b_l0 ! t0__t0ma0 ! b_n_ret_t0
1854LSYM(x32) t0__32a0 ! a1_ne_0_b_l1 ! r__r_t0 ! MILLIRETN
1855LSYM(x33) t0__8a0 ! a1_ne_0_b_l0 ! t0__4t0_a0 ! b_n_ret_t0
1856LSYM(x34) t0__16a0 ! t0__t0_a0 ! b_e_shift ! r__r_2t0
1857LSYM(x35) t0__9a0 ! t0__3t0 ! b_e_t0 ! t0__t0_8a0
1858LSYM(x36) t0__9a0 ! a1_ne_0_b_l1 ! r__r_4t0 ! MILLIRETN
1859LSYM(x37) t0__9a0 ! a1_ne_0_b_l0 ! t0__4t0_a0 ! b_n_ret_t0
1860LSYM(x38) t0__9a0 ! t0__2t0_a0 ! b_e_shift ! r__r_2t0
1861LSYM(x39) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__2t0_a0
1862LSYM(x40) t0__5a0 ! a1_ne_0_b_l1 ! r__r_8t0 ! MILLIRETN
1863LSYM(x41) t0__5a0 ! a1_ne_0_b_l0 ! t0__8t0_a0 ! b_n_ret_t0
1864LSYM(x42) t0__5a0 ! t0__4t0_a0 ! b_e_shift ! r__r_2t0
1865LSYM(x43) t0__5a0 ! t0__4t0_a0 ! b_e_t0 ! t0__2t0_a0
1866LSYM(x44) t0__5a0 ! t0__2t0_a0 ! b_e_shift ! r__r_4t0
1867LSYM(x45) t0__9a0 ! a1_ne_0_b_l0 ! t0__5t0 ! b_n_ret_t0
1868LSYM(x46) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__t0_a0
1869LSYM(x47) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__t0_2a0
1870LSYM(x48) t0__3a0 ! a1_ne_0_b_l0 ! t0__16t0 ! b_n_ret_t0
1871LSYM(x49) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__t0_4a0
1872LSYM(x50) t0__5a0 ! t0__5t0 ! b_e_shift ! r__r_2t0
1873LSYM(x51) t0__9a0 ! t0__t0_8a0 ! b_e_t0 ! t0__3t0
1874LSYM(x52) t0__3a0 ! t0__4t0_a0 ! b_e_shift ! r__r_4t0
1875LSYM(x53) t0__3a0 ! t0__4t0_a0 ! b_e_t0 ! t0__4t0_a0
1876LSYM(x54) t0__9a0 ! t0__3t0 ! b_e_shift ! r__r_2t0
1877LSYM(x55) t0__9a0 ! t0__3t0 ! b_e_t0 ! t0__2t0_a0
1878LSYM(x56) t0__3a0 ! t0__2t0_a0 ! b_e_shift ! r__r_8t0
1879LSYM(x57) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__3t0
1880LSYM(x58) t0__3a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__4t0_a0
1881LSYM(x59) t0__9a0 ! t0__2t0_a0 ! b_e_t02a0 ! t0__3t0
1882LSYM(x60) t0__5a0 ! t0__3t0 ! b_e_shift ! r__r_4t0
1883LSYM(x61) t0__5a0 ! t0__3t0 ! b_e_t0 ! t0__4t0_a0
1884LSYM(x62) t0__32a0 ! t0__t0ma0 ! b_e_shift ! r__r_2t0
1885LSYM(x63) t0__64a0 ! a1_ne_0_b_l0 ! t0__t0ma0 ! b_n_ret_t0
1886LSYM(x64) t0__64a0 ! a1_ne_0_b_l1 ! r__r_t0 ! MILLIRETN
1887LSYM(x65) t0__8a0 ! a1_ne_0_b_l0 ! t0__8t0_a0 ! b_n_ret_t0
1888LSYM(x66) t0__32a0 ! t0__t0_a0 ! b_e_shift ! r__r_2t0
1889LSYM(x67) t0__8a0 ! t0__4t0_a0 ! b_e_t0 ! t0__2t0_a0
1890LSYM(x68) t0__8a0 ! t0__2t0_a0 ! b_e_shift ! r__r_4t0
1891LSYM(x69) t0__8a0 ! t0__2t0_a0 ! b_e_t0 ! t0__4t0_a0
1892LSYM(x70) t0__64a0 ! t0__t0_4a0 ! b_e_t0 ! t0__t0_2a0
1893LSYM(x71) t0__9a0 ! t0__8t0 ! b_e_t0 ! t0__t0ma0
1894LSYM(x72) t0__9a0 ! a1_ne_0_b_l1 ! r__r_8t0 ! MILLIRETN
1895LSYM(x73) t0__9a0 ! t0__8t0_a0 ! b_e_shift ! r__r_t0
1896LSYM(x74) t0__9a0 ! t0__4t0_a0 ! b_e_shift ! r__r_2t0
1897LSYM(x75) t0__9a0 ! t0__4t0_a0 ! b_e_t0 ! t0__2t0_a0
1898LSYM(x76) t0__9a0 ! t0__2t0_a0 ! b_e_shift ! r__r_4t0
1899LSYM(x77) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__4t0_a0
1900LSYM(x78) t0__9a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__2t0_a0
1901LSYM(x79) t0__16a0 ! t0__5t0 ! b_e_t0 ! t0__t0ma0
1902LSYM(x80) t0__16a0 ! t0__5t0 ! b_e_shift ! r__r_t0
1903LSYM(x81) t0__9a0 ! t0__9t0 ! b_e_shift ! r__r_t0
1904LSYM(x82) t0__5a0 ! t0__8t0_a0 ! b_e_shift ! r__r_2t0
1905LSYM(x83) t0__5a0 ! t0__8t0_a0 ! b_e_t0 ! t0__2t0_a0
1906LSYM(x84) t0__5a0 ! t0__4t0_a0 ! b_e_shift ! r__r_4t0
1907LSYM(x85) t0__8a0 ! t0__2t0_a0 ! b_e_t0 ! t0__5t0
1908LSYM(x86) t0__5a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__2t0_a0
1909LSYM(x87) t0__9a0 ! t0__9t0 ! b_e_t02a0 ! t0__t0_4a0
1910LSYM(x88) t0__5a0 ! t0__2t0_a0 ! b_e_shift ! r__r_8t0
1911LSYM(x89) t0__5a0 ! t0__2t0_a0 ! b_e_t0 ! t0__8t0_a0
1912LSYM(x90) t0__9a0 ! t0__5t0 ! b_e_shift ! r__r_2t0
1913LSYM(x91) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__2t0_a0
1914LSYM(x92) t0__5a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__2t0_a0
1915LSYM(x93) t0__32a0 ! t0__t0ma0 ! b_e_t0 ! t0__3t0
1916LSYM(x94) t0__9a0 ! t0__5t0 ! b_e_2t0 ! t0__t0_2a0
1917LSYM(x95) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__5t0
1918LSYM(x96) t0__8a0 ! t0__3t0 ! b_e_shift ! r__r_4t0
1919LSYM(x97) t0__8a0 ! t0__3t0 ! b_e_t0 ! t0__4t0_a0
1920LSYM(x98) t0__32a0 ! t0__3t0 ! b_e_t0 ! t0__t0_2a0
1921LSYM(x99) t0__8a0 ! t0__4t0_a0 ! b_e_t0 ! t0__3t0
1922LSYM(x100) t0__5a0 ! t0__5t0 ! b_e_shift ! r__r_4t0
1923LSYM(x101) t0__5a0 ! t0__5t0 ! b_e_t0 ! t0__4t0_a0
1924LSYM(x102) t0__32a0 ! t0__t0_2a0 ! b_e_t0 ! t0__3t0
1925LSYM(x103) t0__5a0 ! t0__5t0 ! b_e_t02a0 ! t0__4t0_a0
1926LSYM(x104) t0__3a0 ! t0__4t0_a0 ! b_e_shift ! r__r_8t0
1927LSYM(x105) t0__5a0 ! t0__4t0_a0 ! b_e_t0 ! t0__5t0
1928LSYM(x106) t0__3a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__4t0_a0
1929LSYM(x107) t0__9a0 ! t0__t0_4a0 ! b_e_t02a0 ! t0__8t0_a0
1930LSYM(x108) t0__9a0 ! t0__3t0 ! b_e_shift ! r__r_4t0
1931LSYM(x109) t0__9a0 ! t0__3t0 ! b_e_t0 ! t0__4t0_a0
1932LSYM(x110) t0__9a0 ! t0__3t0 ! b_e_2t0 ! t0__2t0_a0
1933LSYM(x111) t0__9a0 ! t0__4t0_a0 ! b_e_t0 ! t0__3t0
1934LSYM(x112) t0__3a0 ! t0__2t0_a0 ! b_e_t0 ! t0__16t0
1935LSYM(x113) t0__9a0 ! t0__4t0_a0 ! b_e_t02a0 ! t0__3t0
1936LSYM(x114) t0__9a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__3t0
1937LSYM(x115) t0__9a0 ! t0__2t0_a0 ! b_e_2t0a0 ! t0__3t0
1938LSYM(x116) t0__3a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__4t0_a0
1939LSYM(x117) t0__3a0 ! t0__4t0_a0 ! b_e_t0 ! t0__9t0
1940LSYM(x118) t0__3a0 ! t0__4t0_a0 ! b_e_t0a0 ! t0__9t0
1941LSYM(x119) t0__3a0 ! t0__4t0_a0 ! b_e_t02a0 ! t0__9t0
1942LSYM(x120) t0__5a0 ! t0__3t0 ! b_e_shift ! r__r_8t0
1943LSYM(x121) t0__5a0 ! t0__3t0 ! b_e_t0 ! t0__8t0_a0
1944LSYM(x122) t0__5a0 ! t0__3t0 ! b_e_2t0 ! t0__4t0_a0
1945LSYM(x123) t0__5a0 ! t0__8t0_a0 ! b_e_t0 ! t0__3t0
1946LSYM(x124) t0__32a0 ! t0__t0ma0 ! b_e_shift ! r__r_4t0
1947LSYM(x125) t0__5a0 ! t0__5t0 ! b_e_t0 ! t0__5t0
1948LSYM(x126) t0__64a0 ! t0__t0ma0 ! b_e_shift ! r__r_2t0
1949LSYM(x127) t0__128a0 ! a1_ne_0_b_l0 ! t0__t0ma0 ! b_n_ret_t0
1950LSYM(x128) t0__128a0 ! a1_ne_0_b_l1 ! r__r_t0 ! MILLIRETN
1951LSYM(x129) t0__128a0 ! a1_ne_0_b_l0 ! t0__t0_a0 ! b_n_ret_t0
1952LSYM(x130) t0__64a0 ! t0__t0_a0 ! b_e_shift ! r__r_2t0
1953LSYM(x131) t0__8a0 ! t0__8t0_a0 ! b_e_t0 ! t0__2t0_a0
1954LSYM(x132) t0__8a0 ! t0__4t0_a0 ! b_e_shift ! r__r_4t0
1955LSYM(x133) t0__8a0 ! t0__4t0_a0 ! b_e_t0 ! t0__4t0_a0
1956LSYM(x134) t0__8a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__2t0_a0
1957LSYM(x135) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__3t0
1958LSYM(x136) t0__8a0 ! t0__2t0_a0 ! b_e_shift ! r__r_8t0
1959LSYM(x137) t0__8a0 ! t0__2t0_a0 ! b_e_t0 ! t0__8t0_a0
1960LSYM(x138) t0__8a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__4t0_a0
1961LSYM(x139) t0__8a0 ! t0__2t0_a0 ! b_e_2t0a0 ! t0__4t0_a0
1962LSYM(x140) t0__3a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__5t0
1963LSYM(x141) t0__8a0 ! t0__2t0_a0 ! b_e_4t0a0 ! t0__2t0_a0
1964LSYM(x142) t0__9a0 ! t0__8t0 ! b_e_2t0 ! t0__t0ma0
1965LSYM(x143) t0__16a0 ! t0__9t0 ! b_e_t0 ! t0__t0ma0
1966LSYM(x144) t0__9a0 ! t0__8t0 ! b_e_shift ! r__r_2t0
1967LSYM(x145) t0__9a0 ! t0__8t0 ! b_e_t0 ! t0__2t0_a0
1968LSYM(x146) t0__9a0 ! t0__8t0_a0 ! b_e_shift ! r__r_2t0
1969LSYM(x147) t0__9a0 ! t0__8t0_a0 ! b_e_t0 ! t0__2t0_a0
1970LSYM(x148) t0__9a0 ! t0__4t0_a0 ! b_e_shift ! r__r_4t0
1971LSYM(x149) t0__9a0 ! t0__4t0_a0 ! b_e_t0 ! t0__4t0_a0
1972LSYM(x150) t0__9a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__2t0_a0
1973LSYM(x151) t0__9a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__2t0_a0
1974LSYM(x152) t0__9a0 ! t0__2t0_a0 ! b_e_shift ! r__r_8t0
1975LSYM(x153) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__8t0_a0
1976LSYM(x154) t0__9a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__4t0_a0
1977LSYM(x155) t0__32a0 ! t0__t0ma0 ! b_e_t0 ! t0__5t0
1978LSYM(x156) t0__9a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__2t0_a0
1979LSYM(x157) t0__32a0 ! t0__t0ma0 ! b_e_t02a0 ! t0__5t0
1980LSYM(x158) t0__16a0 ! t0__5t0 ! b_e_2t0 ! t0__t0ma0
1981LSYM(x159) t0__32a0 ! t0__5t0 ! b_e_t0 ! t0__t0ma0
1982LSYM(x160) t0__5a0 ! t0__4t0 ! b_e_shift ! r__r_8t0
1983LSYM(x161) t0__8a0 ! t0__5t0 ! b_e_t0 ! t0__4t0_a0
1984LSYM(x162) t0__9a0 ! t0__9t0 ! b_e_shift ! r__r_2t0
1985LSYM(x163) t0__9a0 ! t0__9t0 ! b_e_t0 ! t0__2t0_a0
1986LSYM(x164) t0__5a0 ! t0__8t0_a0 ! b_e_shift ! r__r_4t0
1987LSYM(x165) t0__8a0 ! t0__4t0_a0 ! b_e_t0 ! t0__5t0
1988LSYM(x166) t0__5a0 ! t0__8t0_a0 ! b_e_2t0 ! t0__2t0_a0
1989LSYM(x167) t0__5a0 ! t0__8t0_a0 ! b_e_2t0a0 ! t0__2t0_a0
1990LSYM(x168) t0__5a0 ! t0__4t0_a0 ! b_e_shift ! r__r_8t0
1991LSYM(x169) t0__5a0 ! t0__4t0_a0 ! b_e_t0 ! t0__8t0_a0
1992LSYM(x170) t0__32a0 ! t0__t0_2a0 ! b_e_t0 ! t0__5t0
1993LSYM(x171) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__9t0
1994LSYM(x172) t0__5a0 ! t0__4t0_a0 ! b_e_4t0 ! t0__2t0_a0
1995LSYM(x173) t0__9a0 ! t0__2t0_a0 ! b_e_t02a0 ! t0__9t0
1996LSYM(x174) t0__32a0 ! t0__t0_2a0 ! b_e_t04a0 ! t0__5t0
1997LSYM(x175) t0__8a0 ! t0__2t0_a0 ! b_e_5t0 ! t0__2t0_a0
1998LSYM(x176) t0__5a0 ! t0__4t0_a0 ! b_e_8t0 ! t0__t0_a0
1999LSYM(x177) t0__5a0 ! t0__4t0_a0 ! b_e_8t0a0 ! t0__t0_a0
2000LSYM(x178) t0__5a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__8t0_a0
2001LSYM(x179) t0__5a0 ! t0__2t0_a0 ! b_e_2t0a0 ! t0__8t0_a0
2002LSYM(x180) t0__9a0 ! t0__5t0 ! b_e_shift ! r__r_4t0
2003LSYM(x181) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__4t0_a0
2004LSYM(x182) t0__9a0 ! t0__5t0 ! b_e_2t0 ! t0__2t0_a0
2005LSYM(x183) t0__9a0 ! t0__5t0 ! b_e_2t0a0 ! t0__2t0_a0
2006LSYM(x184) t0__5a0 ! t0__9t0 ! b_e_4t0 ! t0__t0_a0
2007LSYM(x185) t0__9a0 ! t0__4t0_a0 ! b_e_t0 ! t0__5t0
2008LSYM(x186) t0__32a0 ! t0__t0ma0 ! b_e_2t0 ! t0__3t0
2009LSYM(x187) t0__9a0 ! t0__4t0_a0 ! b_e_t02a0 ! t0__5t0
2010LSYM(x188) t0__9a0 ! t0__5t0 ! b_e_4t0 ! t0__t0_2a0
2011LSYM(x189) t0__5a0 ! t0__4t0_a0 ! b_e_t0 ! t0__9t0
2012LSYM(x190) t0__9a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__5t0
2013LSYM(x191) t0__64a0 ! t0__3t0 ! b_e_t0 ! t0__t0ma0
2014LSYM(x192) t0__8a0 ! t0__3t0 ! b_e_shift ! r__r_8t0
2015LSYM(x193) t0__8a0 ! t0__3t0 ! b_e_t0 ! t0__8t0_a0
2016LSYM(x194) t0__8a0 ! t0__3t0 ! b_e_2t0 ! t0__4t0_a0
2017LSYM(x195) t0__8a0 ! t0__8t0_a0 ! b_e_t0 ! t0__3t0
2018LSYM(x196) t0__8a0 ! t0__3t0 ! b_e_4t0 ! t0__2t0_a0
2019LSYM(x197) t0__8a0 ! t0__3t0 ! b_e_4t0a0 ! t0__2t0_a0
2020LSYM(x198) t0__64a0 ! t0__t0_2a0 ! b_e_t0 ! t0__3t0
2021LSYM(x199) t0__8a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__3t0
2022LSYM(x200) t0__5a0 ! t0__5t0 ! b_e_shift ! r__r_8t0
2023LSYM(x201) t0__5a0 ! t0__5t0 ! b_e_t0 ! t0__8t0_a0
2024LSYM(x202) t0__5a0 ! t0__5t0 ! b_e_2t0 ! t0__4t0_a0
2025LSYM(x203) t0__5a0 ! t0__5t0 ! b_e_2t0a0 ! t0__4t0_a0
2026LSYM(x204) t0__8a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__3t0
2027LSYM(x205) t0__5a0 ! t0__8t0_a0 ! b_e_t0 ! t0__5t0
2028LSYM(x206) t0__64a0 ! t0__t0_4a0 ! b_e_t02a0 ! t0__3t0
2029LSYM(x207) t0__8a0 ! t0__2t0_a0 ! b_e_3t0 ! t0__4t0_a0
2030LSYM(x208) t0__5a0 ! t0__5t0 ! b_e_8t0 ! t0__t0_a0
2031LSYM(x209) t0__5a0 ! t0__5t0 ! b_e_8t0a0 ! t0__t0_a0
2032LSYM(x210) t0__5a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__5t0
2033LSYM(x211) t0__5a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__5t0
2034LSYM(x212) t0__3a0 ! t0__4t0_a0 ! b_e_4t0 ! t0__4t0_a0
2035LSYM(x213) t0__3a0 ! t0__4t0_a0 ! b_e_4t0a0 ! t0__4t0_a0
2036LSYM(x214) t0__9a0 ! t0__t0_4a0 ! b_e_2t04a0 ! t0__8t0_a0
2037LSYM(x215) t0__5a0 ! t0__4t0_a0 ! b_e_5t0 ! t0__2t0_a0
2038LSYM(x216) t0__9a0 ! t0__3t0 ! b_e_shift ! r__r_8t0
2039LSYM(x217) t0__9a0 ! t0__3t0 ! b_e_t0 ! t0__8t0_a0
2040LSYM(x218) t0__9a0 ! t0__3t0 ! b_e_2t0 ! t0__4t0_a0
2041LSYM(x219) t0__9a0 ! t0__8t0_a0 ! b_e_t0 ! t0__3t0
2042LSYM(x220) t0__3a0 ! t0__9t0 ! b_e_4t0 ! t0__2t0_a0
2043LSYM(x221) t0__3a0 ! t0__9t0 ! b_e_4t0a0 ! t0__2t0_a0
2044LSYM(x222) t0__9a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__3t0
2045LSYM(x223) t0__9a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__3t0
2046LSYM(x224) t0__9a0 ! t0__3t0 ! b_e_8t0 ! t0__t0_a0
2047LSYM(x225) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__5t0
2048LSYM(x226) t0__3a0 ! t0__2t0_a0 ! b_e_t02a0 ! t0__32t0
2049LSYM(x227) t0__9a0 ! t0__5t0 ! b_e_t02a0 ! t0__5t0
2050LSYM(x228) t0__9a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__3t0
2051LSYM(x229) t0__9a0 ! t0__2t0_a0 ! b_e_4t0a0 ! t0__3t0
2052LSYM(x230) t0__9a0 ! t0__5t0 ! b_e_5t0 ! t0__t0_a0
2053LSYM(x231) t0__9a0 ! t0__2t0_a0 ! b_e_3t0 ! t0__4t0_a0
2054LSYM(x232) t0__3a0 ! t0__2t0_a0 ! b_e_8t0 ! t0__4t0_a0
2055LSYM(x233) t0__3a0 ! t0__2t0_a0 ! b_e_8t0a0 ! t0__4t0_a0
2056LSYM(x234) t0__3a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__9t0
2057LSYM(x235) t0__3a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__9t0
2058LSYM(x236) t0__9a0 ! t0__2t0_a0 ! b_e_4t08a0 ! t0__3t0
2059LSYM(x237) t0__16a0 ! t0__5t0 ! b_e_3t0 ! t0__t0ma0
2060LSYM(x238) t0__3a0 ! t0__4t0_a0 ! b_e_2t04a0 ! t0__9t0
2061LSYM(x239) t0__16a0 ! t0__5t0 ! b_e_t0ma0 ! t0__3t0
2062LSYM(x240) t0__9a0 ! t0__t0_a0 ! b_e_8t0 ! t0__3t0
2063LSYM(x241) t0__9a0 ! t0__t0_a0 ! b_e_8t0a0 ! t0__3t0
2064LSYM(x242) t0__5a0 ! t0__3t0 ! b_e_2t0 ! t0__8t0_a0
2065LSYM(x243) t0__9a0 ! t0__9t0 ! b_e_t0 ! t0__3t0
2066LSYM(x244) t0__5a0 ! t0__3t0 ! b_e_4t0 ! t0__4t0_a0
2067LSYM(x245) t0__8a0 ! t0__3t0 ! b_e_5t0 ! t0__2t0_a0
2068LSYM(x246) t0__5a0 ! t0__8t0_a0 ! b_e_2t0 ! t0__3t0
2069LSYM(x247) t0__5a0 ! t0__8t0_a0 ! b_e_2t0a0 ! t0__3t0
2070LSYM(x248) t0__32a0 ! t0__t0ma0 ! b_e_shift ! r__r_8t0
2071LSYM(x249) t0__32a0 ! t0__t0ma0 ! b_e_t0 ! t0__8t0_a0
2072LSYM(x250) t0__5a0 ! t0__5t0 ! b_e_2t0 ! t0__5t0
2073LSYM(x251) t0__5a0 ! t0__5t0 ! b_e_2t0a0 ! t0__5t0
2074LSYM(x252) t0__64a0 ! t0__t0ma0 ! b_e_shift ! r__r_4t0
2075LSYM(x253) t0__64a0 ! t0__t0ma0 ! b_e_t0 ! t0__4t0_a0
2076LSYM(x254) t0__128a0 ! t0__t0ma0 ! b_e_shift ! r__r_2t0
2077LSYM(x255) t0__256a0 ! a1_ne_0_b_l0 ! t0__t0ma0 ! b_n_ret_t0
fe19a83d 2078/*1040 insts before this. */
48bd7758
AM
2079LSYM(ret_t0) MILLIRET
2080LSYM(e_t0) r__r_t0
2081LSYM(e_shift) a1_ne_0_b_l2
2082 a0__256a0 /* a0 <<= 8 *********** */
2083 MILLIRETN
2084LSYM(e_t0ma0) a1_ne_0_b_l0
2085 t0__t0ma0
2086 MILLIRET
2087 r__r_t0
2088LSYM(e_t0a0) a1_ne_0_b_l0
2089 t0__t0_a0
2090 MILLIRET
2091 r__r_t0
2092LSYM(e_t02a0) a1_ne_0_b_l0
2093 t0__t0_2a0
2094 MILLIRET
2095 r__r_t0
2096LSYM(e_t04a0) a1_ne_0_b_l0
2097 t0__t0_4a0
2098 MILLIRET
2099 r__r_t0
2100LSYM(e_2t0) a1_ne_0_b_l1
2101 r__r_2t0
2102 MILLIRETN
2103LSYM(e_2t0a0) a1_ne_0_b_l0
2104 t0__2t0_a0
2105 MILLIRET
2106 r__r_t0
2107LSYM(e2t04a0) t0__t0_2a0
2108 a1_ne_0_b_l1
2109 r__r_2t0
2110 MILLIRETN
2111LSYM(e_3t0) a1_ne_0_b_l0
2112 t0__3t0
2113 MILLIRET
2114 r__r_t0
2115LSYM(e_4t0) a1_ne_0_b_l1
2116 r__r_4t0
2117 MILLIRETN
2118LSYM(e_4t0a0) a1_ne_0_b_l0
2119 t0__4t0_a0
2120 MILLIRET
2121 r__r_t0
2122LSYM(e4t08a0) t0__t0_2a0
2123 a1_ne_0_b_l1
2124 r__r_4t0
2125 MILLIRETN
2126LSYM(e_5t0) a1_ne_0_b_l0
2127 t0__5t0
2128 MILLIRET
2129 r__r_t0
2130LSYM(e_8t0) a1_ne_0_b_l1
2131 r__r_8t0
2132 MILLIRETN
2133LSYM(e_8t0a0) a1_ne_0_b_l0
2134 t0__8t0_a0
2135 MILLIRET
2136 r__r_t0
2137
2138 .procend
2139 .end
2140#endif