From: nickc Date: Sat, 6 Jan 2001 00:46:33 +0000 (+0000) Subject: Faster __mulsi routine X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=82e0de5a9ba5204ac2b01bc526a3e650fd3ad35d;p=thirdparty%2Fgcc.git Faster __mulsi routine git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@38736 138bc75d-0d04-0410-961f-82ee72b054a4 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 84f8a29522b0..3b02eb8db5aa 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,8 @@ +Fri Jan 5 16:34:18 2001 Nick Clifton + + * config/v850/lib1funcs.asm: Replace __mulsi3 routine with faster + version supplied by Matteo Frigo. + 2001-01-05 Neil Booth * cpp.texi: Update for -MQ. diff --git a/gcc/config/v850/lib1funcs.asm b/gcc/config/v850/lib1funcs.asm index a162421e812f..9248acb117b9 100644 --- a/gcc/config/v850/lib1funcs.asm +++ b/gcc/config/v850/lib1funcs.asm @@ -33,57 +33,60 @@ Boston, MA 02111-1307, USA. */ .type ___mulsi3,@function /* - * In order to not deal with negative numbers (mulh is a signed multiply - * and we want an unsigned multiply, code the multiplication as a series - * of 7 bit multiplies). + * #define SHIFT 12 + * #define MASK ((1 << SHIFT) - 1) + * + * #define STEP(i, j) \ + * ({ \ + * short a_part = (a >> (i)) & MASK; \ + * short b_part = (b >> (j)) & MASK; \ + * int res = (((int)a_part) * ((int)b_part)); \ + * res; \ + * }) * - * int __mulsi3 (unsigned a, unsigned b) + * int + * __mulsi3 (unsigned a, unsigned b) * { - * int i, j; - * int ret = 0; - * - * for (i = 0; i < 32; i += 7) - * { - * short a_part = a & 0x7f; - * unsigned b_tmp = b; - * a >>= 7; - * - * for (j = 0; (i+j) < 32; j += 7) - * { - * short b_part = b_tmp & 0x7f; - * ret += (((int)a_part) * ((int)b_part)) << (i+j); - * b_tmp >>= 7; - * } - * } - * - * return ret; + * return STEP (0, 0) + + * ((STEP (SHIFT, 0) + STEP (0, SHIFT)) << SHIFT) + + * ((STEP (0, 2 * SHIFT) + STEP (SHIFT, SHIFT) + STEP (2 * SHIFT, 0)) + * << (2 * SHIFT)); * } */ ___mulsi3: - mov 0,r10 /* total */ - mov 0,r14 /* i = 0, index for multiply a's part */ - movea lo(31),r0,r16 /* upper bounds for loop */ -.L5: - mov r7,r13 /* b_tmp = b */ - andi 0x7f,r6,r15 /* a_part = (a & 127) */ - shr 7,r6 /* a >>= 7 */ - mov r14,r12 /* i+j = i */ -.L9: - andi 0x7f,r13,r11 /* b_part = (b_tmp & 127) */ - mulh r15,r11 /* ((int)a_part) * ((int)b_part) */ - shr 7,r13 /* b_tmp >>= 7 */ - shl r12,r11 /* (((int)a_part) * ((int)b_part)) << (i+j) */ - add r11,r10 /* ret += (((int)a_part) * ((int)b_part)) << (i+j) */ - add 7,r12 /* i+j += 7 */ - cmp r16,r12 /* i+j < 32 */ - ble .L9 - - add 7,r14 /* i += 7 */ - cmp r16,r14 /* i < 32 */ - ble .L5 - - jmp [r31] /* return */ + mov r6,r13 + movea lo(4095),r0,r16 + and r16,r13 + mov r7,r15 + and r16,r15 + mov r13,r10 + mulh r15,r10 + shr 12,r6 + mov r6,r14 + and r16,r14 + mov r14,r11 + mulh r15,r11 + shr 12,r7 + mov r7,r12 + and r16,r12 + shr 12,r7 + and r16,r7 + mulh r13,r7 + shr 12,r6 + mulh r12,r13 + and r16,r6 + add r13,r11 + shl 12,r11 + add r11,r10 + mov r14,r11 + mulh r12,r11 + mulh r15,r6 + add r11,r7 + add r6,r7 + shl 24,r7 + add r7,r10 + jmp [r31] .size ___mulsi3,.-___mulsi3 #endif