From: Cerion Armour-Brown Date: Fri, 18 Nov 2005 20:45:18 +0000 (+0000) Subject: Changed altivec floating point setup to Java/IEEE mode X-Git-Tag: svn/VALGRIND_3_1_0~36 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=0f776af6d24161e68d142be10a9890575e02d1f5;p=thirdparty%2Fvalgrind.git Changed altivec floating point setup to Java/IEEE mode - Non-Java mode is the system default, but was causing some accuracy problems by rounding off intermediate denormalised results to zero. We now have some small errors (lowest bit only) due to using greater accuracy than the system default, but is better overall. Also expanded dispatcher check of FPSCR to include all contol bits git-svn-id: svn://svn.valgrind.org/valgrind/trunk@5196 --- diff --git a/coregrind/m_dispatch/dispatch-ppc32-linux.S b/coregrind/m_dispatch/dispatch-ppc32-linux.S index 30968c9943..dbb7c785a8 100644 --- a/coregrind/m_dispatch/dispatch-ppc32-linux.S +++ b/coregrind/m_dispatch/dispatch-ppc32-linux.S @@ -164,12 +164,10 @@ LafterVMX1: /* 24(sp) used later to stop ctr reg being clobbered */ + /* 8:20(sp) free */ + /* Linkage Area (reserved) - 20(sp) : TOC save area - 16(sp) : link editor word - 12(sp) : compiler word - 8(sp) : LR - 4(sp) : CR + 4(sp) : LR 0(sp) : back-chain */ @@ -191,6 +189,7 @@ LafterVMX1: lwz 3,VG_(machine_ppc32_has_FP)@l(3) cmplwi 3,0 beq LafterFP2 + fsub 3,3,3 /* generate zero */ mtfsf 0xFF,3 LafterFP2: @@ -201,10 +200,8 @@ LafterFP2: lwz 3,VG_(machine_ppc32_has_VMX)@l(3) cmplwi 3,0 beq LafterVMX2 - /* generate vector {0x0,0x0,0x0,0x00010000} */ - vspltisw 3,0x1 /* 4x 0x00000001 */ - vspltisw 4,0x0 /* generate zero */ - vsldoi 3,4,3,0x6 /* v3 = v3 >> 10 bytes */ + + vspltisw 3,0x0 /* generate zero */ mtvscr 3 LafterVMX2: @@ -298,12 +295,12 @@ run_innerloop_exit: cmplwi 10,0 beq LafterFP8 - /* Check FPSCR[RM] == 0 */ + /* Check FPSCR & 0xFF == 0 (lowest 8bits are controls) */ mffs 4 /* fpscr -> fpr */ li 5,48 stfiwx 4,5,1 /* fpr to stack */ lwzx 6,5,1 /* load to gpr */ - andi. 6,6,0x3 /* mask wanted bits */ + andi. 6,6,0xFF /* mask wanted bits */ cmplwi 6,0x0 /* cmp with zero */ bne invariant_violation /* branch if not zero */ LafterFP8: @@ -318,13 +315,13 @@ LafterFP8: /* first generate 4x 0x00010000 */ vspltisw 4,0x1 /* 4x 0x00000001 */ vspltisw 5,0x0 /* zero */ - vsldoi 6,4,5,0x2 /* << 2bytes => 4x 0x00010000 */ + vsldoi 6,4,5,0x2 /* <<2*8 => 4x 0x00010000 */ /* retrieve VSCR and mask wanted bits */ mfvscr 7 - vand 7,7,6 /* gives SAT flag */ + vand 7,7,6 /* gives NJ flag */ vspltw 7,7,0x3 /* flags-word to all lanes */ - vcmpequw. 8,6,7 /* CR[24] = 1 if equal */ - bt 26,invariant_violation /* branch if bit 26 of CR is true */ + vcmpequw. 8,6,7 /* CR[24] = 1 if v6 == v7 */ + bt 24,invariant_violation /* branch if all_equal */ LafterVMX8: /* otherwise we're OK */ diff --git a/none/tests/ppc32/jm-insns.c b/none/tests/ppc32/jm-insns.c index 178a9149c2..9fa274b682 100644 --- a/none/tests/ppc32/jm-insns.c +++ b/none/tests/ppc32/jm-insns.c @@ -5425,7 +5425,8 @@ static test_loop_t float_loops[] = { __asm__ __volatile__ ("vsubsbs 31,%0,%1" : : "vr" (v1), "vr" (v2)); // sets VSCR[SAT] */ -#define DEFAULT_VSCR 0x00010000 +//#define DEFAULT_VSCR 0x00010000 +#define DEFAULT_VSCR 0x0 static void test_av_int_one_arg (const char* name, test_func_t func, unused uint32_t test_flags)