]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
(ashiftrt_r4_*): Rewrite for efficiency.
authorJim Wilson <wilson@gcc.gnu.org>
Fri, 28 Apr 1995 01:08:43 +0000 (18:08 -0700)
committerJim Wilson <wilson@gcc.gnu.org>
Fri, 28 Apr 1995 01:08:43 +0000 (18:08 -0700)
(ashrsi3, lshrsi3, lshrsi3): Add.

From-SVN: r9521

gcc/config/sh/lib1funcs.asm

index 1472d82b11698210f78a436ffcd892a1d8e18c6b..be24f76a8f4509490ab3c95feb19c48897597e9e 100644 (file)
@@ -34,6 +34,9 @@ the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */
 !! Contributed by Steve Chamberlain.
 !! sac@cygnus.com
 
+!! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines
+!! recoded in assembly by Toshiyasu Morita
+!! tm@netcom.com
 
 #ifdef L_ashiftrt
        .global ___ashiftrt_r4_0
@@ -70,10 +73,13 @@ the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */
        .global ___ashiftrt_r4_31
        .global ___ashiftrt_r4_32
 
+       .align  1
 ___ashiftrt_r4_32:
-       shar    r4
 ___ashiftrt_r4_31:
-       shar    r4
+       rotcl   r4
+       rts
+       subc    r4,r4
+
 ___ashiftrt_r4_30:
        shar    r4
 ___ashiftrt_r4_29:
@@ -87,7 +93,11 @@ ___ashiftrt_r4_26:
 ___ashiftrt_r4_25:
        shar    r4
 ___ashiftrt_r4_24:
-       shar    r4
+       shlr16  r4
+       shlr8   r4
+       rts
+       exts.b  r4,r4
+
 ___ashiftrt_r4_23:
        shar    r4
 ___ashiftrt_r4_22:
@@ -103,7 +113,10 @@ ___ashiftrt_r4_18:
 ___ashiftrt_r4_17:
        shar    r4
 ___ashiftrt_r4_16:
-       shar    r4
+       shlr16  r4
+       rts
+       exts.w  r4,r4
+
 ___ashiftrt_r4_15:
        shar    r4
 ___ashiftrt_r4_14:
@@ -141,6 +154,471 @@ ___ashiftrt_r4_0:
        nop
 #endif
 
+#ifdef L_ashiftrt_n
+
+!
+! ___ashrsi3
+!
+! Entry:
+!
+! r4: Value to shift
+! r5: Shifts
+!
+! Exit:
+!
+! r0: Result
+!
+! Destroys:
+!
+! r1
+!
+
+       .global ___ashrsi3
+       .align  2
+___ashrsi3:
+       mov     #31,r0
+       cmp/hi  r0,r5
+       bt      L_ashrsi3_31
+       mov     r5,r1                   ! Remove later
+       mova    L_ashrsi3_table,r0
+       shll    r1                      ! Remove later
+       mov.w   @(r0,r1),r1             ! Change to mov.b @(r0,r5),r1
+       add     r1,r0
+       jmp     @r0
+       mov     r4,r0
+
+L_ashrsi3_table:
+       .word           L_ashrsi3_0-L_ashrsi3_table     ! Change to .bytes
+       .word           L_ashrsi3_1-L_ashrsi3_table
+       .word           L_ashrsi3_2-L_ashrsi3_table
+       .word           L_ashrsi3_3-L_ashrsi3_table
+       .word           L_ashrsi3_4-L_ashrsi3_table
+       .word           L_ashrsi3_5-L_ashrsi3_table
+       .word           L_ashrsi3_6-L_ashrsi3_table
+       .word           L_ashrsi3_7-L_ashrsi3_table
+       .word           L_ashrsi3_8-L_ashrsi3_table
+       .word           L_ashrsi3_9-L_ashrsi3_table
+       .word           L_ashrsi3_10-L_ashrsi3_table
+       .word           L_ashrsi3_11-L_ashrsi3_table
+       .word           L_ashrsi3_12-L_ashrsi3_table
+       .word           L_ashrsi3_13-L_ashrsi3_table
+       .word           L_ashrsi3_14-L_ashrsi3_table
+       .word           L_ashrsi3_15-L_ashrsi3_table
+       .word           L_ashrsi3_16-L_ashrsi3_table
+       .word           L_ashrsi3_17-L_ashrsi3_table
+       .word           L_ashrsi3_18-L_ashrsi3_table
+       .word           L_ashrsi3_19-L_ashrsi3_table
+       .word           L_ashrsi3_20-L_ashrsi3_table
+       .word           L_ashrsi3_21-L_ashrsi3_table
+       .word           L_ashrsi3_22-L_ashrsi3_table
+       .word           L_ashrsi3_23-L_ashrsi3_table
+       .word           L_ashrsi3_24-L_ashrsi3_table
+       .word           L_ashrsi3_25-L_ashrsi3_table
+       .word           L_ashrsi3_26-L_ashrsi3_table
+       .word           L_ashrsi3_27-L_ashrsi3_table
+       .word           L_ashrsi3_28-L_ashrsi3_table
+       .word           L_ashrsi3_29-L_ashrsi3_table
+       .word           L_ashrsi3_30-L_ashrsi3_table
+       .word           L_ashrsi3_31-L_ashrsi3_table
+
+L_ashrsi3_31:
+       rotcl   r0
+       rts
+       subc    r0,r0
+
+L_ashrsi3_30:
+       shar    r0
+L_ashrsi3_29:
+       shar    r0
+L_ashrsi3_28:
+       shar    r0
+L_ashrsi3_27:
+       shar    r0
+L_ashrsi3_26:
+       shar    r0
+L_ashrsi3_25:
+       shar    r0
+L_ashrsi3_24:
+       shlr16  r0
+       shlr8   r0
+       rts
+       exts.b  r0,r0
+
+L_ashrsi3_23:
+       shar    r0
+L_ashrsi3_22:
+       shar    r0
+L_ashrsi3_21:
+       shar    r0
+L_ashrsi3_20:
+       shar    r0
+L_ashrsi3_19:
+       shar    r0
+L_ashrsi3_18:
+       shar    r0
+L_ashrsi3_17:
+       shar    r0
+L_ashrsi3_16:
+       shlr16  r0
+       rts
+       exts.w  r0,r0
+
+L_ashrsi3_15:
+       shar    r0
+L_ashrsi3_14:
+       shar    r0
+L_ashrsi3_13:
+       shar    r0
+L_ashrsi3_12:
+       shar    r0
+L_ashrsi3_11:
+       shar    r0
+L_ashrsi3_10:
+       shar    r0
+L_ashrsi3_9:
+       shar    r0
+L_ashrsi3_8:
+       shar    r0
+L_ashrsi3_7:
+       shar    r0
+L_ashrsi3_6:
+       shar    r0
+L_ashrsi3_5:
+       shar    r0
+L_ashrsi3_4:
+       shar    r0
+L_ashrsi3_3:
+       shar    r0
+L_ashrsi3_2:
+       shar    r0
+L_ashrsi3_1:
+       rts
+       shar    r0
+
+L_ashrsi3_0:
+       rts
+       nop
+
+#endif
+
+#ifdef L_ashiftlt
+
+!
+! ___ashlsi3
+!
+! Entry:
+!
+! r4: Value to shift
+! r5: Shifts
+!
+! Exit:
+!
+! r0: Result
+!
+! Destroys:
+!
+! r1
+!
+       .global ___ashlsi3
+       .align  2
+___ashlsi3:
+       mov     #31,r0
+       cmp/hi  r0,r5
+       bt      L_ashlsi3_32
+       mov     r5,r1                   ! Remove later
+       mova    L_ashlsi3_table,r0
+       shll    r1                      ! Remove later
+       mov.w   @(r0,r1),r1             ! Change to mov.b @(r0,r5),r1
+       add     r1,r0
+       jmp     @r0
+       mov     r4,r0
+
+L_ashlsi3_table:
+       .word           L_ashlsi3_0-L_ashlsi3_table     ! Change to .bytes
+       .word           L_ashlsi3_1-L_ashlsi3_table
+       .word           L_ashlsi3_2-L_ashlsi3_table
+       .word           L_ashlsi3_3-L_ashlsi3_table
+       .word           L_ashlsi3_4-L_ashlsi3_table
+       .word           L_ashlsi3_5-L_ashlsi3_table
+       .word           L_ashlsi3_6-L_ashlsi3_table
+       .word           L_ashlsi3_7-L_ashlsi3_table
+       .word           L_ashlsi3_8-L_ashlsi3_table
+       .word           L_ashlsi3_9-L_ashlsi3_table
+       .word           L_ashlsi3_10-L_ashlsi3_table
+       .word           L_ashlsi3_11-L_ashlsi3_table
+       .word           L_ashlsi3_12-L_ashlsi3_table
+       .word           L_ashlsi3_13-L_ashlsi3_table
+       .word           L_ashlsi3_14-L_ashlsi3_table
+       .word           L_ashlsi3_15-L_ashlsi3_table
+       .word           L_ashlsi3_16-L_ashlsi3_table
+       .word           L_ashlsi3_17-L_ashlsi3_table
+       .word           L_ashlsi3_18-L_ashlsi3_table
+       .word           L_ashlsi3_19-L_ashlsi3_table
+       .word           L_ashlsi3_20-L_ashlsi3_table
+       .word           L_ashlsi3_21-L_ashlsi3_table
+       .word           L_ashlsi3_22-L_ashlsi3_table
+       .word           L_ashlsi3_23-L_ashlsi3_table
+       .word           L_ashlsi3_24-L_ashlsi3_table
+       .word           L_ashlsi3_25-L_ashlsi3_table
+       .word           L_ashlsi3_26-L_ashlsi3_table
+       .word           L_ashlsi3_27-L_ashlsi3_table
+       .word           L_ashlsi3_28-L_ashlsi3_table
+       .word           L_ashlsi3_29-L_ashlsi3_table
+       .word           L_ashlsi3_30-L_ashlsi3_table
+       .word           L_ashlsi3_31-L_ashlsi3_table
+
+L_ashlsi3_6:
+       shll2   r0
+L_ashlsi3_4:
+       shll2   r0
+L_ashlsi3_2:
+       rts
+       shll2   r0
+
+L_ashlsi3_7:
+       shll2   r0
+L_ashlsi3_5:
+       shll2   r0
+L_ashlsi3_3:
+       shll2   r0
+L_ashlsi3_1:
+       rts
+       shll    r0
+
+L_ashlsi3_14:
+       shll2   r0
+L_ashlsi3_12:
+       shll2   r0
+L_ashlsi3_10:
+       shll2   r0
+L_ashlsi3_8:
+       rts
+       shll8   r0
+
+L_ashlsi3_15:
+       shll2   r0
+L_ashlsi3_13:
+       shll2   r0
+L_ashlsi3_11:
+       shll2   r0
+L_ashlsi3_9:
+       shll8   r0
+       rts
+       shll    r0
+
+L_ashlsi3_22:
+       shll2   r0
+L_ashlsi3_20:
+       shll2   r0
+L_ashlsi3_18:
+       shll2   r0
+L_ashlsi3_16:
+       rts
+       shll16  r0
+
+L_ashlsi3_23:
+       shll2   r0
+L_ashlsi3_21:
+       shll2   r0
+L_ashlsi3_19:
+       shll2   r0
+L_ashlsi3_17:
+       shll16  r0
+       rts
+       shll    r0
+
+L_ashlsi3_30:
+       shll2   r0
+L_ashlsi3_28:
+       shll2   r0
+L_ashlsi3_26:
+       shll2   r0
+L_ashlsi3_24:
+       shll16  r0
+       rts
+       shll8   r0
+
+L_ashlsi3_31:
+       shll2   r0
+L_ashlsi3_29:
+       shll2   r0
+L_ashlsi3_27:
+       shll2   r0
+L_ashlsi3_25:
+       shll16  r0
+       shll8   r0
+       rts
+       shll    r0
+
+L_ashlsi3_32:
+       rts
+       mov     #0,r0
+
+L_ashlsi3_0:
+       rts
+       nop
+
+#endif
+
+#ifdef L_lshiftrt
+
+!
+! ___lshrsi3
+!
+! Entry:
+!
+! r4: Value to shift
+! r5: Shifts
+!
+! Exit:
+!
+! r0: Result
+!
+! Destroys:
+!
+! r1
+!
+       .global ___lshrsi3
+       .align  2
+___lshrsi3:
+       mov     #31,r0
+       cmp/hi  r0,r5
+       bt      L_lshrsi3_32
+       mov     r5,r1                   ! Remove later
+       mova    L_lshrsi3_table,r0
+       shll    r1                      ! Remove later
+       mov.w   @(r0,r1),r1             ! Change to mov.b @(r0,r5),r1
+       add     r1,r0
+       jmp     @r0
+       mov     r4,r0
+
+L_lshrsi3_table:
+       .word           L_lshrsi3_0-L_lshrsi3_table     ! Change to .bytes
+       .word           L_lshrsi3_1-L_lshrsi3_table
+       .word           L_lshrsi3_2-L_lshrsi3_table
+       .word           L_lshrsi3_3-L_lshrsi3_table
+       .word           L_lshrsi3_4-L_lshrsi3_table
+       .word           L_lshrsi3_5-L_lshrsi3_table
+       .word           L_lshrsi3_6-L_lshrsi3_table
+       .word           L_lshrsi3_7-L_lshrsi3_table
+       .word           L_lshrsi3_8-L_lshrsi3_table
+       .word           L_lshrsi3_9-L_lshrsi3_table
+       .word           L_lshrsi3_10-L_lshrsi3_table
+       .word           L_lshrsi3_11-L_lshrsi3_table
+       .word           L_lshrsi3_12-L_lshrsi3_table
+       .word           L_lshrsi3_13-L_lshrsi3_table
+       .word           L_lshrsi3_14-L_lshrsi3_table
+       .word           L_lshrsi3_15-L_lshrsi3_table
+       .word           L_lshrsi3_16-L_lshrsi3_table
+       .word           L_lshrsi3_17-L_lshrsi3_table
+       .word           L_lshrsi3_18-L_lshrsi3_table
+       .word           L_lshrsi3_19-L_lshrsi3_table
+       .word           L_lshrsi3_20-L_lshrsi3_table
+       .word           L_lshrsi3_21-L_lshrsi3_table
+       .word           L_lshrsi3_22-L_lshrsi3_table
+       .word           L_lshrsi3_23-L_lshrsi3_table
+       .word           L_lshrsi3_24-L_lshrsi3_table
+       .word           L_lshrsi3_25-L_lshrsi3_table
+       .word           L_lshrsi3_26-L_lshrsi3_table
+       .word           L_lshrsi3_27-L_lshrsi3_table
+       .word           L_lshrsi3_28-L_lshrsi3_table
+       .word           L_lshrsi3_29-L_lshrsi3_table
+       .word           L_lshrsi3_30-L_lshrsi3_table
+       .word           L_lshrsi3_31-L_lshrsi3_table
+
+L_lshrsi3_6:
+       shlr2   r0
+L_lshrsi3_4:
+       shlr2   r0
+L_lshrsi3_2:
+       rts
+       shlr2   r0
+
+L_lshrsi3_7:
+       shlr2   r0
+L_lshrsi3_5:
+       shlr2   r0
+L_lshrsi3_3:
+       shlr2   r0
+L_lshrsi3_1:
+       rts
+       shlr    r0
+
+L_lshrsi3_14:
+       shlr2   r0
+L_lshrsi3_12:
+       shlr2   r0
+L_lshrsi3_10:
+       shlr2   r0
+L_lshrsi3_8:
+       rts
+       shlr8   r0
+
+L_lshrsi3_15:
+       shlr2   r0
+L_lshrsi3_13:
+       shlr2   r0
+L_lshrsi3_11:
+       shlr2   r0
+L_lshrsi3_9:
+       shlr8   r0
+       rts
+       shlr    r0
+
+L_lshrsi3_22:
+       shlr2   r0
+L_lshrsi3_20:
+       shlr2   r0
+L_lshrsi3_18:
+       shlr2   r0
+L_lshrsi3_16:
+       rts
+       shlr16  r0
+
+L_lshrsi3_23:
+       shlr2   r0
+L_lshrsi3_21:
+       shlr2   r0
+L_lshrsi3_19:
+       shlr2   r0
+L_lshrsi3_17:
+       shlr16  r0
+       rts
+       shlr    r0
+
+L_lshrsi3_30:
+       shlr2   r0
+L_lshrsi3_28:
+       shlr2   r0
+L_lshrsi3_26:
+       shlr2   r0
+L_lshrsi3_24:
+       shlr16  r0
+       rts
+       shlr8   r0
+
+L_lshrsi3_31:
+       shlr2   r0
+L_lshrsi3_29:
+       shlr2   r0
+L_lshrsi3_27:
+       shlr2   r0
+L_lshrsi3_25:
+       shlr16  r0
+       shlr8   r0
+       rts
+       shlr    r0
+
+L_lshrsi3_32:
+       rts
+       mov     #0,r0
+
+L_lshrsi3_0:
+       rts
+       nop
+
+#endif
+
 #ifdef L_movstr
        .text
 ! done all the large groups, do the remainder
@@ -152,7 +630,7 @@ done:
        shll2   r6
        add     r6,r0
        jmp     @r0
-       add     #64,r4  
+       add     #64,r4
        .align  4
        .global ___movstrSI64
 ___movstrSI64:
@@ -305,7 +783,7 @@ ___mulsi3:
        swap.w  r4,r2           ! r2 = bbaa
        xtrct   r2,r3           ! r3 = aacc
        tst     r3,r3           ! msws zero ?
-       bf      hiset           
+       bf      hiset
        rts                     ! yes - then weve got the answer
        sts     macl,r0
 
@@ -313,14 +791,14 @@ hiset:    sts     macl,r0         ! r0 = bb*dd
        mulu    r2,r5           | brewing macl = aa*dd
        sts     macl,r1
        mulu    r3,r4           | brewing macl = cc*bb
-       sts     macl,r2         
+       sts     macl,r2
        add     r1,r2
        shll16  r2
        rts
        add     r2,r0
-       
-       
-#endif 
+
+
+#endif
 #ifdef L_sdivsi3
        .title "SH DIVIDE"
 !! 4 byte integer Divide code for the Hitachi SH
@@ -336,7 +814,7 @@ hiset:      sts     macl,r0         ! r0 = bb*dd
 ___sdivsi3:
        mov     r4,r1
        mov     r5,r0
-       
+
        tst     r0,r0
        bt      div0
        mov     #0,r2
@@ -410,10 +888,10 @@ ___sdivsi3:
        div1    r0,r3
        rotcl   r1
        addc    r2,r1
-       rts     
+       rts
        mov     r1,r0
 
-       
+
 div0:  rts
        mov     #0,r0
 
@@ -426,7 +904,7 @@ div0:       rts
 !! Steve Chamberlain
 !! sac@cygnus.com
 !!
-!! 
+!!
 
 !! args in r4 and r5, result in r0, clobbers r4, pr, and t bit
        .global ___udivsi3
@@ -434,12 +912,12 @@ div0:     rts
 ___udivsi3:
 longway:
        mov     #0,r0
-       div0u           
-       ! get one bit from the msb of the numerator into the T 
+       div0u
+       ! get one bit from the msb of the numerator into the T
        ! bit and divide it by whats in r5.  Put the answer bit
        ! into the T bit so it can come out again at the bottom
 
-       rotcl   r4 ; div1 r5,r0 
+       rotcl   r4 ; div1 r5,r0
        rotcl   r4 ; div1 r5,r0
        rotcl   r4 ; div1 r5,r0
        rotcl   r4 ; div1 r5,r0
@@ -475,9 +953,8 @@ vshortway:
        rotcl   r4 ; div1 r5,r0
        rotcl   r4 ; div1 r5,r0
        rotcl   r4 ; div1 r5,r0
-       rotcl   r4      
+       rotcl   r4
 ret:   rts
        mov     r4,r0
 
 #endif
-