From: Carl Love Date: Fri, 26 Feb 2021 22:05:12 +0000 (-0600) Subject: PPC64: Reduced-Precision: Missing Integer-based Outer Product Operations X-Git-Tag: VALGRIND_3_18_0~143 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=e09fdaf569b975717465ed8043820d0198d4d47d;p=thirdparty%2Fvalgrind.git PPC64: Reduced-Precision: Missing Integer-based Outer Product Operations Add support for: pmxvi16ger2 VSX Vector 16-bit Signed Integer GER (rank-2 update), Prefixed Masked pmxvi16ger2pp VSX Vector 16-bit Signed Integer GER (rank-2 update) (Positive multiply, Positive accumulate), Prefixed Masked pmxvi8ger4spp VSX Vector 8-bit Signed/Unsigned Integer GER (rank-4 update) with Saturation (Positive multiply, Positive accumulate), Prefixed Masked xvi16ger2 VSX Vector 16-bit Signed Integer GER (rank-2 update) xvi16ger2pp VSX Vector 16-bit Signed Integer GER (rank-2 update) (Positive multiply, Positive accumulate) xvi8ger4spp VSX Vector 8-bit Signed/Unsigned Integer GER (rank-4 update) with Saturation (Positive multiply, Positive accumulate) --- diff --git a/VEX/priv/guest_ppc_helpers.c b/VEX/priv/guest_ppc_helpers.c index 6bcee966d2..d8131eb607 100644 --- a/VEX/priv/guest_ppc_helpers.c +++ b/VEX/priv/guest_ppc_helpers.c @@ -1446,16 +1446,16 @@ static UInt exts4( UInt src) return src & 0xF; /* make sure high order bits are zero */ } -static UInt exts8( UInt src) +static ULong exts8( UInt src) { - /* Input is an 8-bit value. Extend bit 7 to bits [31:8] */ + /* Input is an 8-bit value. Extend bit 7 to bits [63:8] */ if (( src >> 7 ) & 0x1) - return src | 0xFFFFFF00; /* sign bit is a 1, extend */ + return src | 0xFFFFFFFFFFFFFF00ULL; /* sign bit is a 1, extend */ else return src & 0xFF; /* make sure high order bits are zero */ } -static UInt extz8( UInt src) +static ULong extz8( UInt src) { /* Input is an 8-bit value. Extend src on the left with zeros. */ return src & 0xFF; /* make sure high order bits are zero */ @@ -1662,12 +1662,12 @@ void vsx_matrix_8bit_ger_dirty_helper( VexGuestPPC64State* gst, ULong srcB_hi, ULong srcB_lo, UInt masks_inst ) { - UInt i, j, mask, sum, inst, acc_entry, prefix_inst; + UInt i, j, mask, inst, acc_entry, prefix_inst; UInt srcA_bytes[4][4]; /* word, byte */ UInt srcB_bytes[4][4]; /* word, byte */ UInt acc_word[4]; - UInt prod0, prod1, prod2, prod3; + ULong prod0, prod1, prod2, prod3, sum; UInt result[4]; UInt pmsk = 0; UInt xmsk = 0; @@ -1742,10 +1742,13 @@ void vsx_matrix_8bit_ger_dirty_helper( VexGuestPPC64State* gst, sum = prod0 + prod1 + prod2 + prod3; if ( inst == XVI8GER4 ) - result[j] = sum; + result[j] = chop64to32( sum ); else if ( inst == XVI8GER4PP ) - result[j] = sum + acc_word[j]; + result[j] = chop64to32( sum + acc_word[j] ); + + else if ( inst == XVI8GER4SPP ) + result[j] = clampS64toS32(sum + acc_word[j]); } else { result[j] = 0; @@ -1821,7 +1824,7 @@ void vsx_matrix_16bit_ger_dirty_helper( VexGuestPPC64State* gst, else prod1 = exts16to64( srcA_word[i][1] ) * exts16to64( srcB_word[j][1] ); - /* sum is UInt so the result is choped to 32-bits */ + sum = prod0 + prod1; if ( inst == XVI16GER2 ) @@ -1830,13 +1833,11 @@ void vsx_matrix_16bit_ger_dirty_helper( VexGuestPPC64State* gst, else if ( inst == XVI16GER2S ) result[j] = clampS64toS32( sum ); - else if ( inst == XVI16GER2PP ) { + else if ( inst == XVI16GER2PP ) result[j] = chop64to32( sum + acc_word[j] ); - } - else if ( inst == XVI16GER2SPP ) { + else if ( inst == XVI16GER2SPP ) result[j] = clampS64toS32( sum + acc_word[j] ); - } } else { result[j] = 0; diff --git a/VEX/priv/guest_ppc_toIR.c b/VEX/priv/guest_ppc_toIR.c index 20553a5394..e54f0f3898 100644 --- a/VEX/priv/guest_ppc_toIR.c +++ b/VEX/priv/guest_ppc_toIR.c @@ -5993,6 +5993,7 @@ static void vsx_matrix_ger ( const VexAbiInfo* vbi, break; case XVI4GER8PP: case XVI8GER4PP: + case XVI8GER4SPP: case XVI16GER2PP: case XVI16GER2SPP: case XVBF16GER2PP: @@ -34983,6 +34984,12 @@ static Bool dis_vsx_accumulator_prefix ( UInt prefix, UInt theInstr, getVSReg( rA_addr ), getVSReg( rB_addr ), AT, ( ( inst_prefix << 8 ) | XO ) ); break; + case XVI8GER4SPP: + DIP("xvi8ger4spp %u,r%u, r%u\n", AT, rA_addr, rB_addr); + vsx_matrix_ger( vbi, MATRIX_8BIT_INT_GER, + getVSReg( rA_addr ), getVSReg( rB_addr ), + AT, ( ( inst_prefix << 8 ) | XO ) ); + break; case XVI16GER2S: DIP("xvi16ger2s %u,r%u, r%u\n", AT, rA_addr, rB_addr); vsx_matrix_ger( vbi, MATRIX_16BIT_INT_GER, @@ -34995,6 +35002,19 @@ static Bool dis_vsx_accumulator_prefix ( UInt prefix, UInt theInstr, getVSReg( rA_addr ), getVSReg( rB_addr ), AT, ( ( inst_prefix << 8 ) | XO ) ); break; + case XVI16GER2: + DIP("xvi16ger2 %u,r%u, r%u\n", AT, rA_addr, rB_addr); + vsx_matrix_ger( vbi, MATRIX_16BIT_INT_GER, + getVSReg( rA_addr ), getVSReg( rB_addr ), + AT, ( ( inst_prefix << 8 ) | XO ) ); + break; + case XVI16GER2PP: + DIP("xvi16ger2pp %u,r%u, r%u\n", AT, rA_addr, rB_addr); + vsx_matrix_ger( vbi, MATRIX_16BIT_INT_GER, + getVSReg( rA_addr ), getVSReg( rB_addr ), + AT, ( ( inst_prefix << 8 ) | XO ) ); + break; + case XVF16GER2: DIP("xvf16ger2 %u,r%u, r%u\n", AT, rA_addr, rB_addr); vsx_matrix_ger( vbi, MATRIX_16BIT_FLOAT_GER, @@ -35193,6 +35213,39 @@ static Bool dis_vsx_accumulator_prefix ( UInt prefix, UInt theInstr, AT, ( (MASKS << 9 ) | ( inst_prefix << 8 ) | XO ) ); break; + case XVI8GER4SPP: + PMSK = IFIELD( prefix, 12, 4); + XMSK = IFIELD( prefix, 4, 4); + YMSK = IFIELD( prefix, 0, 4); + DIP("pmxvi8ger4spp %u,r%u, r%u,%u,%u,%u\n", + AT, rA_addr, rB_addr, XMSK, YMSK, PMSK); + vsx_matrix_ger( vbi, MATRIX_8BIT_INT_GER, + getVSReg( rA_addr ), getVSReg( rB_addr ), + AT, + ( (MASKS << 9 ) | ( inst_prefix << 8 ) | XO ) ); + break; + case XVI16GER2: + PMSK = IFIELD( prefix, 12, 4); + XMSK = IFIELD( prefix, 4, 4); + YMSK = IFIELD( prefix, 0, 4); + DIP("pmxvi16ger2 %u,r%u, r%u,%u,%u,%u\n", + AT, rA_addr, rB_addr, XMSK, YMSK, PMSK); + vsx_matrix_ger( vbi, MATRIX_16BIT_INT_GER, + getVSReg( rA_addr ), getVSReg( rB_addr ), + AT, + ( (MASKS << 9 ) | ( inst_prefix << 8 ) | XO ) ); + break; + case XVI16GER2PP: + PMSK = IFIELD( prefix, 12, 4); + XMSK = IFIELD( prefix, 4, 4); + YMSK = IFIELD( prefix, 0, 4); + DIP("pmxvi16ger2pp %u,r%u, r%u,%u,%u,%u\n", + AT, rA_addr, rB_addr, XMSK, YMSK, PMSK); + vsx_matrix_ger( vbi, MATRIX_16BIT_INT_GER, + getVSReg( rA_addr ), getVSReg( rB_addr ), + AT, + ( (MASKS << 9 ) | ( inst_prefix << 8 ) | XO ) ); + break; case XVI16GER2S: PMSK = IFIELD( prefix, 14, 2); XMSK = IFIELD( prefix, 4, 4); @@ -36345,6 +36398,9 @@ DisResult disInstr_PPC_WRK ( (opc2 == XVI4GER8PP) || // xvi4ger8pp (opc2 == XVI8GER4) || // xvi8ger4 (opc2 == XVI8GER4PP) || // xvi8ger4pp + (opc2 == XVI8GER4SPP) || // xvi8ger4spp + (opc2 == XVI16GER2) || // xvi16ger2 + (opc2 == XVI16GER2PP) || // xvi16ger2pp (opc2 == XVBF16GER2) || // xvbf16ger2 (opc2 == XVBF16GER2PP) || // xvbf16ger2pp (opc2 == XVBF16GER2PN) || // xvbf16ger2pn