From: Carl Love Date: Fri, 25 Sep 2020 21:54:12 +0000 (-0500) Subject: ISA 3.1 Reduced-Precision: Outer Product Operations X-Git-Tag: VALGRIND_3_17_0~113 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=092e5620d40d54bc1ab6a77c895fc18b0c86c6a9;p=thirdparty%2Fvalgrind.git ISA 3.1 Reduced-Precision: Outer Product Operations Add support for: pmxvf16ger2 Prefixed Masked VSX Vector 16-bit Floating-Point GER (rank-2 update) pmxvf16ger2nn Prefixed Masked VSX Vector 16-bit Floating-Point GER (rank-2 update) (Negative multiply, Negative accumulate) pmxvf16ger2np Prefixed Masked VSX Vector 16-bit Floating-Point GER (rank-2 update) (Negative multiply, Positive accumulate) pmxvf16ger2pn Prefixed Masked VSX Vector 16-bit Floating-Point GER (rank-2 update) (Positive multiply, Negative accumulate) pmxvf16ger2pp Prefixed Masked VSX Vector 16-bit Floating-Point GER (rank-2 update) (Positive multiply, Positive accumulate) pmxvf32ger Prefixed Masked VSX Vector 32-bit Floating-Point GER (rank-1 update) pmxvf32gernn Prefixed Masked VSX Vector 32-bit Floating-Point GER (rank-1 update) (Negative multiply, Negative accumulate) pmxvf32gernp Prefixed Masked VSX Vector 32-bit Floating-Point GER (rank-1 update) (Negative multiply, Positive accumulate) pmxvf32gerpn Prefixed Masked VSX Vector 32-bit Floating-Point GER (rank-1 update) (Positive multiply, Negative accumulate) pmxvf32gerpp Prefixed Masked VSX Vector 32-bit Floating-Point GER (rank-1 update) (Positive multiply, Positive accumulate) pmxvf64ger Prefixed Masked VSX Vector 64-bit Floating-Point GER (rank-1 update) pmxvf64gernn Prefixed Masked VSX Vector 64-bit Floating-Point GER (rank-1 update) (Negative multiply, Negative accumulate) pmxvf64gernp Prefixed Masked VSX Vector 64-bit Floating-Point GER (rank-1 update) (Negative multiply, Positive accumulate) pmxvf64gerpn Prefixed Masked VSX Vector 64-bit Floating-Point GER (rank-1 update) (Positive multiply, Negative accumulate) pmxvf64gerpp Prefixed Masked VSX Vector 64-bit Floating-Point GER (rank-1 update) (Positive multiply, Positive accumulate) pmxvi16ger2s Prefixed Masked VSX Vector 16-bit Signed Integer GER (rank-2 update) with Saturation pmxvi16ger2spp Prefixed Masked VSX Vector 16-bit Signed Integer GER (rank-2 update) with Saturation (Positive multiply, Positive accumulate) pmxvi4ger8 Prefixed Masked VSX Vector 4-bit Signed Integer GER (rank-8 update) pmxvi4ger8pp Prefixed Masked VSX Vector 4-bit Signed Integer GER (rank-8 update) (Positive multiply, Positive accumulate) pmxvi8ger4 Prefixed Masked VSX Vector 8-bit Signed/Unsigned Integer GER (rank-4 update) pmxvi8ger4pp Prefixed Masked VSX Vector 8-bit Signed/Unsigned Integer GER (rank-4 update) (Positive multiply, Positive accumulate) xvf16ger2 VSX Vector 16-bit Floating-Point GER (rank-2 update) xvf16ger2nn VSX Vector 16-bit Floating-Point GER (rank-2 update) (Positive multiply, Positive accumulate) xvf16ger2np VSX Vector 16-bit Floating-Point GER (rank-2 update) (Negative multiply, Positive accumulate) xvf16ger2pn VSX Vector 16-bit Floating-Point GER (rank-2 update) (Positive multiply, Negative accumulate) xvf16ger2pp VSX Vector 16-bit Floating-Point GER (rank-2 update) (Positive multiply, Positive accumulate) xvf32ger VSX Vector 32-bit Floating-Point GER (rank-1 update) xvf32gernn VSX Vector 32-bit Floating-Point GER (rank-1 update) (Negative multiply, Negative accumulate) xvf32gernp VSX Vector 32-bit Floating-Point GER (rank-1 update) (Negative multiply, Positive accumulate) xvf32gerpn VSX Vector 32-bit Floating-Point GER (rank-1 update) (Positive multiply, Negative accumulate) xvf32gerpp VSX Vector 32-bit Floating-Point GER (rank-1 update) (Positive multiply, Positive accumulate) xvf64ger VSX Vector 64-bit Floating-Point GER (rank-1 update) xvf64gernn VSX Vector 64-bit Floating-Point GER (rank-1 update) (Negative multiply, Negative accumulate) xvf64gernp VSX Vector 64-bit Floating-Point GER (rank-1 update) (Negative multiply, Positive accumulate) xvf64gerpn VSX Vector 64-bit Floating-Point GER (rank-1 update) (Positive multiply, Negative accumulate) xvf64gerpp VSX Vector 64-bit Floating-Point GER (rank-1 update) (Positive multiply, Positive accumulate) xvi16ger2s VSX Vector 16-bit Signed Integer GER (rank-2 update) with Saturation xvi16ger2spp VSX Vector 16-bit Signed Integer GER (rank-2 update) with Saturation (Positive multiply, Positive accumulate) xvi4ger8 VSX Vector 4-bit Signed Integer GER (rank-8 update) xvi4ger8pp VSX Vector 4-bit Signed Integer GER (rank-8 update) (Positive multiply, Positive accumulate) xvi8ger4 VSX Vector 8-bit Signed/Unsigned Integer GER (rank-4 update) xvi8ger4pp VSX Vector 8-bit Signed/Unsigned Integer GER (rank-4 update) (Positive multiply, Positive accumulate) xxmfacc VSX Move From ACC xxmtacc VSX Move To ACC xxsetaccz VSX Set ACC to Zero --- diff --git a/VEX/priv/guest_ppc_defs.h b/VEX/priv/guest_ppc_defs.h index 2efa89e102..da228b9f04 100644 --- a/VEX/priv/guest_ppc_defs.h +++ b/VEX/priv/guest_ppc_defs.h @@ -160,7 +160,36 @@ extern ULong deposit_bits_under_mask_helper( ULong src, ULong mask ); extern ULong population_count64_helper( ULong src ); extern ULong vector_evaluate64_helper( ULong srcA, ULong srcB, ULong srcC, ULong IMM ); - +void write_ACC_entry (VexGuestPPC64State* gst, UInt offset, UInt acc, + UInt reg, UInt *result); +void get_ACC_entry (VexGuestPPC64State* gst, UInt offset, UInt acc, + UInt reg, UInt *result); + +/* 8-bit XO value from instruction description */ +#define XVI4GER8 0b00100011 +#define XVI4GER8PP 0b00100010 +#define XVI8GER4 0b00000011 +#define XVI8GER4PP 0b00000010 +#define XVI8GER4SPP 0b01100011 +#define XVI16GER2 0b01001011 +#define XVI16GER2PP 0b01101011 +#define XVI16GER2S 0b00101011 +#define XVI16GER2SPP 0b00101010 +#define XVF16GER2 0b00010011 +#define XVF16GER2PP 0b00010010 +#define XVF16GER2PN 0b10010010 +#define XVF16GER2NP 0b01010010 +#define XVF16GER2NN 0b11010010 +#define XVF32GER 0b00011011 +#define XVF32GERPP 0b00011010 +#define XVF32GERPN 0b10011010 +#define XVF32GERNP 0b01011010 +#define XVF32GERNN 0b11011010 +#define XVF64GER 0b00111011 +#define XVF64GERPP 0b00111010 +#define XVF64GERPN 0b10111010 +#define XVF64GERNP 0b01111010 +#define XVF64GERNN 0b11111010 /* --- DIRTY HELPERS --- */ @@ -179,6 +208,42 @@ extern void ppc64g_dirtyhelper_LVS ( VexGuestPPC64State* gst, UInt shift_right, UInt endness ); +extern void vsx_matrix_4bit_ger_dirty_helper( VexGuestPPC64State* gst, + UInt offset, + ULong srcA_hi, ULong srcA_lo, + ULong srcB_hi, ULong srcB_lo, + UInt inst_mask ); +extern void vsx_matrix_8bit_ger_dirty_helper( VexGuestPPC64State* gst, + UInt offset, + ULong srcA_hi, ULong srcA_lo, + ULong srcB_hi, ULong srcB_lo, + UInt inst_mask ); +extern void vsx_matrix_16bit_ger_dirty_helper( VexGuestPPC64State* gst, + UInt offset, + ULong srcA_hi, ULong srcA_lo, + ULong srcB_hi, ULong srcB_lo, + UInt inst_mask ); +extern void vsx_matrix_16bit_float_ger_dirty_helper( VexGuestPPC64State* gst, + UInt offset, + ULong srcA_hi, + ULong srcA_lo, + ULong srcB_hi, + ULong srcB_lo, + UInt masks_inst ); +extern void vsx_matrix_32bit_float_ger_dirty_helper( VexGuestPPC64State* gst, + UInt offset, + ULong srcA_hi, + ULong srcA_lo, + ULong srcB_hi, + ULong srcB_lo, + UInt masks_inst ); +extern void vsx_matrix_64bit_float_ger_dirty_helper( VexGuestPPC64State* gst, + UInt offset, + ULong srcX_hi, + ULong srcX_lo, + ULong srcY_hi, + ULong srcY_lo, + UInt masks_inst ); #endif /* ndef __VEX_GUEST_PPC_DEFS_H */ /*---------------------------------------------------------------*/ diff --git a/VEX/priv/guest_ppc_helpers.c b/VEX/priv/guest_ppc_helpers.c index 3bea3f9928..45dce63512 100644 --- a/VEX/priv/guest_ppc_helpers.c +++ b/VEX/priv/guest_ppc_helpers.c @@ -653,6 +653,814 @@ ULong vector_evaluate64_helper( ULong srcA, ULong srcB, ULong srcC, } +/*------------------------------------------------*/ +/*---- VSX Matrix signed integer GER functions ---*/ +/*------------------------------------------------*/ +static UInt exts4( UInt src) +{ + /* Input is an 4-bit value. Extend bit 3 to bits [31:4] */ + if (( src >> 3 ) & 0x1) + return src | 0xFFFFFFF0; /* sign bit is a 1, extend */ + else + return src & 0xF; /* make sure high order bits are zero */ +} + +static UInt exts8( UInt src) +{ + /* Input is an 8-bit value. Extend bit 7 to bits [31:8] */ + if (( src >> 7 ) & 0x1) + return src | 0xFFFFFF00; /* sign bit is a 1, extend */ + else + return src & 0xFF; /* make sure high order bits are zero */ +} + +static UInt extz8( UInt src) +{ + /* Input is an 8-bit value. Extend src on the left with zeros. */ + return src & 0xFF; /* make sure high order bits are zero */ +} + +static ULong exts16to64( UInt src) +{ + /* Input is an 16-bit value. Extend bit 15 to bits [63:16] */ + if (( src >> 15 ) & 0x1) + return ((ULong) src) | 0xFFFFFFFFFFFF0000ULL; /* sign is 1, extend */ + else + /* make sure high order bits are zero */ + return ((ULong) src) & 0xFFFFULL; +} + +static UInt chop64to32( Long src ) { + /* Take a 64-bit input, return the lower 32-bits */ + return (UInt)(0xFFFFFFFF & src); +} + +static UInt clampS64toS32( Long src ) { + /* Take a 64-bit signed input, clamp positive values to 2^31, + clamp negative values at -2^31. Return the result in an + unsigned 32-bit value. */ + Long max_val = 2147483647; // 2^31-1 + if ( src > max_val) + return (UInt)max_val; + + if (src < -max_val) + return (UInt)-max_val; + + return (UInt)src; +} + +void write_ACC_entry (VexGuestPPC64State* gst, UInt offset, UInt acc, UInt reg, + UInt *acc_word) +{ + U128* pU128_dst; + + vassert( (acc >= 0) && (acc < 8) ); + vassert( (reg >= 0) && (reg < 4) ); + + pU128_dst = (U128*) (((UChar*)gst) + offset + acc*4*sizeof(U128) + + reg*sizeof(U128)); + + /* The U128 type is defined as an array of unsigned intetgers. */ + (*pU128_dst)[0] = acc_word[0]; + (*pU128_dst)[1] = acc_word[1]; + (*pU128_dst)[2] = acc_word[2]; + (*pU128_dst)[3] = acc_word[3]; + return; +} + +void get_ACC_entry (VexGuestPPC64State* gst, UInt offset, UInt acc, UInt reg, + UInt *acc_word) +{ + U128* pU128_src; + + acc_word[3] = 0xDEAD; + acc_word[2] = 0xBEEF; + acc_word[1] = 0xBAD; + acc_word[0] = 0xBEEF; + + vassert( (acc >= 0) && (acc < 8) ); + vassert( (reg >= 0) && (reg < 4) ); + + pU128_src = (U128*) (((UChar*)gst) + offset + acc*4*sizeof(U128) + + reg*sizeof(U128)); + + /* The U128 type is defined as an array of unsigned intetgers. */ + acc_word[0] = (*pU128_src)[0]; + acc_word[1] = (*pU128_src)[1]; + acc_word[2] = (*pU128_src)[2]; + acc_word[3] = (*pU128_src)[3]; + return; +} + +void vsx_matrix_4bit_ger_dirty_helper ( VexGuestPPC64State* gst, + UInt offset_ACC, + ULong srcA_hi, ULong srcA_lo, + ULong srcB_hi, ULong srcB_lo, + UInt masks_inst ) +{ + /* This helper calculates the result for one of the four ACC entires. + It is called twice, to get the hi and then the low 64-bit of the + 128-bit result. */ + UInt i, j, mask, sum, inst, acc_entry, prefix_inst; + + UInt srcA_nibbles[4][8]; /* word, nibble */ + UInt srcB_nibbles[4][8]; /* word, nibble */ + UInt acc_word[4]; + UInt prod0, prod1, prod2, prod3, prod4, prod5, prod6, prod7; + UInt result[4]; + UInt pmsk = 0; + UInt xmsk = 0; + UInt ymsk = 0; + + mask = 0xF; + inst = (masks_inst >> 5) & 0xFF; + prefix_inst = (masks_inst >> 13) & 0x1; + acc_entry = masks_inst & 0xF; + + /* LE word numbering */ + if ( prefix_inst == 0 ) { + /* Set the masks for non-prefix instructions */ + pmsk = 0b11111111; + xmsk = 0b1111; + ymsk = 0b1111; + + } else { + pmsk = (masks_inst >> 22) & 0xFF; + xmsk = (masks_inst >> 18) & 0xF; + ymsk = (masks_inst >> 14) & 0xF; + } + + /* Address nibbles using IBM numbering */ + for( i = 0; i < 4; i++) { + /* Get the ACC contents directly from the PPC64 state */ + get_ACC_entry (gst, offset_ACC, acc_entry, 3-i, acc_word); + + // input is in double words + for( j = 0; j< 8; j++) { + srcA_nibbles[3][j] = (srcA_hi >> (60-4*j)) & mask; // hi bits [63:32] + srcA_nibbles[2][j] = (srcA_hi >> (28-4*j)) & mask; // hi bits [31:0] + srcA_nibbles[1][j] = (srcA_lo >> (60-4*j)) & mask; // lo bits [63:32] + srcA_nibbles[0][j] = (srcA_lo >> (28-4*j)) & mask; // lo bits [31:0] + + srcB_nibbles[3][j] = (srcB_hi >> (60-4*j)) & mask; + srcB_nibbles[2][j] = (srcB_hi >> (28-4*j)) & mask; + srcB_nibbles[1][j] = (srcB_lo >> (60-4*j)) & mask; + srcB_nibbles[0][j] = (srcB_lo >> (28-4*j)) & mask; + } + + for( j = 0; j < 4; j++) { + if (((xmsk >> i) & 0x1) & ((ymsk >> j) & 0x1)) { + if (((pmsk >> 7) & 0x1) == 0) + prod0 = 0; + else + prod0 = exts4( srcA_nibbles[i][0] ) + * exts4( srcB_nibbles[j][0] ); + + if (((pmsk >> 6) & 0x1) == 0) + prod1 = 0; + else + prod1 = exts4( srcA_nibbles[i][1] ) + * exts4( srcB_nibbles[j][1] ); + + if (((pmsk >> 5) & 0x1) == 0) + prod2 = 0; + else + prod2 = exts4( srcA_nibbles[i][2] ) + * exts4( srcB_nibbles[j][2] ); + + if (((pmsk >> 4) & 0x1) == 0) + prod3 = 0; + else + prod3 = exts4( srcA_nibbles[i][3] ) + * exts4( srcB_nibbles[j][3] ); + + if (((pmsk >> 3) & 0x1) == 0) + prod4 = 0; + else + prod4 = exts4( srcA_nibbles[i][4] ) + * exts4( srcB_nibbles[j][4] ); + + if (((pmsk >> 2) & 0x1) == 0) + prod5 = 0; + else + prod5 = exts4( srcA_nibbles[i][5] ) + * exts4( srcB_nibbles[j][5] ); + + if (((pmsk >> 1) & 0x1) == 0) + prod6 = 0; + else + prod6 = exts4( srcA_nibbles[i][6] ) + * exts4( srcB_nibbles[j][6] ); + + if ((pmsk & 0x1) == 0) + prod7 = 0; + else + prod7 = exts4( srcA_nibbles[i][7] ) + * exts4( srcB_nibbles[j][7] ); + /* sum is UInt so the result is choped to 32-bits */ + sum = prod0 + prod1 + prod2 + prod3 + prod4 + + prod5 + prod6 + prod7; + + if ( inst == XVI4GER8 ) + result[j] = sum; + + else if ( inst == XVI4GER8PP ) + result[j] = sum + acc_word[j]; + + } else { + result[j] = 0; + } + } + write_ACC_entry (gst, offset_ACC, acc_entry, 3-i, result); + } +} + +void vsx_matrix_8bit_ger_dirty_helper( VexGuestPPC64State* gst, + UInt offset_ACC, + ULong srcA_hi, ULong srcA_lo, + ULong srcB_hi, ULong srcB_lo, + UInt masks_inst ) +{ + UInt i, j, mask, sum, inst, acc_entry, prefix_inst; + + UInt srcA_bytes[4][4]; /* word, byte */ + UInt srcB_bytes[4][4]; /* word, byte */ + UInt acc_word[4]; + UInt prod0, prod1, prod2, prod3; + UInt result[4]; + UInt pmsk = 0; + UInt xmsk = 0; + UInt ymsk = 0; + + mask = 0xFF; + inst = (masks_inst >> 5) & 0xFF; + prefix_inst = (masks_inst >> 13) & 0x1; + acc_entry = masks_inst & 0xF; + + /* LE word numbering */ + if ( prefix_inst == 0 ) { + /* Set the masks */ + pmsk = 0b1111; + xmsk = 0b1111; + ymsk = 0b1111; + + } else { + pmsk = (masks_inst >> 26) & 0xF; + xmsk = (masks_inst >> 18) & 0xF; + ymsk = (masks_inst >> 14) & 0xF; + } + + /* Address byes using IBM numbering */ + for( i = 0; i < 4; i++) { + /* Get the ACC contents directly from the PPC64 state */ + get_ACC_entry (gst, offset_ACC, acc_entry, 3-i, acc_word); + + for( j = 0; j< 4; j++) { + srcA_bytes[3][j] = (srcA_hi >> (56-8*j)) & mask; + srcA_bytes[2][j] = (srcA_hi >> (24-8*j)) & mask; + srcA_bytes[1][j] = (srcA_lo >> (56-8*j)) & mask; + srcA_bytes[0][j] = (srcA_lo >> (24-8*j)) & mask; + + srcB_bytes[3][j] = (srcB_hi >> (56-8*j)) & mask; + srcB_bytes[2][j] = (srcB_hi >> (24-8*j)) & mask; + srcB_bytes[1][j] = (srcB_lo >> (56-8*j)) & mask; + srcB_bytes[0][j] = (srcB_lo >> (24-8*j)) & mask; + } + + for( j = 0; j < 4; j++) { + if (((xmsk >> i) & 0x1) & ((ymsk >> j) & 0x1)) { + if (((pmsk >> 3) & 0x1) == 0) + prod0 = 0; + else + prod0 = + exts8( srcA_bytes[i][0] ) + * extz8( srcB_bytes[j][0] ); + + if (((pmsk >> 2) & 0x1) == 0) + prod1 = 0; + else + prod1 = + exts8( srcA_bytes[i][1] ) + * extz8( srcB_bytes[j][1] ); + + if (((pmsk >> 1) & 0x1) == 0) + prod2 = 0; + else + prod2 = + exts8( srcA_bytes[i][2] ) + * extz8( srcB_bytes[j][2] ); + + if (((pmsk >> 0) & 0x1) == 0) + prod3 = 0; + else + prod3 = + exts8( srcA_bytes[i][3] ) + * extz8( srcB_bytes[j][3] ); + + /* sum is UInt so the result is choped to 32-bits */ + sum = prod0 + prod1 + prod2 + prod3; + + if ( inst == XVI8GER4 ) + result[j] = sum; + + else if ( inst == XVI8GER4PP ) + result[j] = sum + acc_word[j]; + + } else { + result[j] = 0; + } + } + write_ACC_entry (gst, offset_ACC, acc_entry, 3-i, result); + } +} + +void vsx_matrix_16bit_ger_dirty_helper( VexGuestPPC64State* gst, + UInt offset_ACC, + ULong srcA_hi, ULong srcA_lo, + ULong srcB_hi, ULong srcB_lo, + UInt masks_inst ) +{ + UInt i, j, mask, inst, acc_entry, prefix_inst; + ULong sum; + UInt srcA_word[4][2]; /* word, hword */ + UInt srcB_word[4][2]; /* word, hword */ + UInt acc_word[4]; + ULong prod0, prod1; + UInt result[4]; + UInt pmsk = 0; + UInt xmsk = 0; + UInt ymsk = 0; + + mask = 0xFFFF; + inst = (masks_inst >> 5) & 0xFF; + prefix_inst = (masks_inst >> 13) & 0x1; + acc_entry = masks_inst & 0xF; + + /* LE word numbering */ + if ( prefix_inst == 0 ) { + /* Set the masks for non prefix instructions */ + pmsk = 0b11; + xmsk = 0b1111; + ymsk = 0b1111; + + } else { + pmsk = (masks_inst >> 28) & 0x3; + xmsk = (masks_inst >> 18) & 0xF; + ymsk = (masks_inst >> 14) & 0xF; + } + + /* Address half-words using IBM numbering */ + for( i = 0; i < 4; i++) { + /* Get the ACC contents directly from the PPC64 state */ + get_ACC_entry (gst, offset_ACC, acc_entry, 3-i, acc_word); + + for( j = 0; j< 2; j++) { + srcA_word[3][j] = (srcA_hi >> (48-16*j)) & mask; + srcA_word[2][j] = (srcA_hi >> (16-16*j)) & mask; + srcA_word[1][j] = (srcA_lo >> (48-16*j)) & mask; + srcA_word[0][j] = (srcA_lo >> (16-16*j)) & mask; + + srcB_word[3][j] = (srcB_hi >> (48-16*j)) & mask; + srcB_word[2][j] = (srcB_hi >> (16-16*j)) & mask; + srcB_word[1][j] = (srcB_lo >> (48-16*j)) & mask; + srcB_word[0][j] = (srcB_lo >> (16-16*j)) & mask; + } + + for( j = 0; j < 4; j++) { + if (((xmsk >> i) & 0x1) & ((ymsk >> j) & 0x1)) { + if (((pmsk >> 1) & 0x1) == 0) + prod0 = 0; + + else + prod0 = exts16to64( srcA_word[i][0] ) + * exts16to64( srcB_word[j][0] ); + + if (((pmsk >> 0) & 0x1) == 0) + prod1 = 0; + else + prod1 = exts16to64( srcA_word[i][1] ) + * exts16to64( srcB_word[j][1] ); + /* sum is UInt so the result is choped to 32-bits */ + sum = prod0 + prod1; + + if ( inst == XVI16GER2 ) + result[j] = chop64to32( sum ); + + else if ( inst == XVI16GER2S ) + result[j] = clampS64toS32( sum ); + + else if ( inst == XVI16GER2PP ) { + result[j] = chop64to32( sum + acc_word[j] ); + } + + else if ( inst == XVI16GER2SPP ) { + result[j] = clampS64toS32( sum + acc_word[j] ); + } + + } else { + result[j] = 0; + } + } + write_ACC_entry (gst, offset_ACC, acc_entry, 3-i, result); + } +} + +//matrix 16 float stuff +union +convert_t { + UInt u32; + ULong u64; + Float f; + Double d; +}; + +static Float reinterpret_int_as_float( UInt input ) +{ + /* Reinterpret the bit pattern of an int as a float. */ + __attribute__ ((aligned (128))) union convert_t conv; + + conv.u32 = input; + return conv.f; +} + +static UInt reinterpret_float_as_int( Float input ) +{ + /* Reinterpret the bit pattern of an int as a float. */ + __attribute__ ((aligned (128))) union convert_t conv; + + conv.f = input; + return conv.u32; +} + +static Double reinterpret_long_as_double( ULong input ) +{ + /* Reinterpret the bit pattern of an int as a float. */ + __attribute__ ((aligned (128))) union convert_t conv; + + conv.u64 = input; + return conv.d; +} + +static ULong reinterpret_double_as_long( Double input ) +{ + /* Reinterpret the bit pattern of an int as a float. */ + __attribute__ ((aligned (128))) union convert_t conv; + + conv.d = input; + return conv.u64; +} + +static Double conv_f16_to_double( ULong input ) +{ + // This all seems to be very alignment sensitive?? + __attribute__ ((aligned (64))) ULong src; + __attribute__ ((aligned (64))) Double result; + src = input; + __asm__ __volatile__ ("xscvhpdp %x0,%x1" : "=wa" (result) : "wa" (src)); + return result; +} + + +static Float conv_double_to_float( Double src ) +{ + return (float) src ; +} + + +static Double negate_double( Double input ) +{ + /* Don't negate a NaN value. A NaN has an exponet + of all 1's, non zero fraction. */ + __attribute__ ((aligned (128))) union convert_t conv; + + conv.d = input; + + if ( ( ( conv.u64 & I64_EXP_MASK) == I64_EXP_MASK ) + && ( ( conv.u64 & I64_FRACTION_MASK ) != 0 ) ) + return input; + else + return -input; +} + +static Float negate_float( Float input ) +{ + /* Don't negate a NaN value. A NaN has an exponet + of all 1's, non zero fraction. */ + __attribute__ ((aligned (128))) union convert_t conv; + + conv.f = input; + + if ( ( ( conv.u32 & I32_EXP_MASK) == I32_EXP_MASK ) + && ( ( conv.u32 & I32_FRACTION_MASK ) != 0 ) ) + return input; + else + return -input; +} + +void vsx_matrix_16bit_float_ger_dirty_helper( VexGuestPPC64State* gst, + UInt offset_ACC, + ULong srcA_hi, ULong srcA_lo, + ULong srcB_hi, ULong srcB_lo, + UInt masks_inst ) +{ + UInt i, j, mask, inst, acc_entry, prefix_inst; + + UInt srcA_word[4][2]; /* word, hword */ + UInt srcB_word[4][2]; /* word, hword */ + Double src10, src11, src20, src21; + UInt acc_word_input[4]; + Float acc_word[4]; + Double prod; + Double msum; + UInt result[4]; + UInt pmsk = 0; + UInt xmsk = 0; + UInt ymsk = 0; + + mask = 0xFFFF; + inst = (masks_inst >> 5) & 0xFF; + prefix_inst = (masks_inst >> 13) & 0x1; + acc_entry = masks_inst & 0xF; + + if ( prefix_inst == 0 ) { + /* Set the masks for non-prefix instructions */ + pmsk = 0b11; + xmsk = 0b1111; + ymsk = 0b1111; + + } else { + /* Use mask supplied with prefix inst */ + pmsk = (masks_inst >> 28) & 0x3; + xmsk = (masks_inst >> 18) & 0xF; + ymsk = (masks_inst >> 14) & 0xF; + } + + /* Address half-words using IBM numbering */ + for( i = 0; i < 4; i++) { + /* Get the ACC contents directly from the PPC64 state */ + get_ACC_entry (gst, offset_ACC, acc_entry, 3-i, acc_word_input); + + acc_word[3] = reinterpret_int_as_float( acc_word_input[3] ); + acc_word[2] = reinterpret_int_as_float( acc_word_input[2] ); + acc_word[1] = reinterpret_int_as_float( acc_word_input[1] ); + acc_word[0] = reinterpret_int_as_float( acc_word_input[0] ); + + for( j = 0; j < 2; j++) { // input is in double words + srcA_word[3][j] = (UInt)((srcA_hi >> (48-16*j)) & mask); + srcA_word[2][j] = (UInt)((srcA_hi >> (16-16*j)) & mask); + srcA_word[1][j] = (UInt)((srcA_lo >> (48-16*j)) & mask); + srcA_word[0][j] = (UInt)((srcA_lo >> (16-16*j)) & mask); + + srcB_word[3][j] = (UInt)((srcB_hi >> (48-16*j)) & mask); + srcB_word[2][j] = (UInt)((srcB_hi >> (16-16*j)) & mask); + srcB_word[1][j] = (UInt)((srcB_lo >> (48-16*j)) & mask); + srcB_word[0][j] = (UInt)((srcB_lo >> (16-16*j)) & mask); + } + + for( j = 0; j < 4; j++) { + if (((pmsk >> 1) & 0x1) == 0) { + src10 = 0; + src20 = 0; + } else { + src10 = conv_f16_to_double((ULong)srcA_word[i][0]); + src20 = conv_f16_to_double((ULong)srcB_word[j][0]); + } + + if ((pmsk & 0x1) == 0) { + src11 = 0; + src21 = 0; + } else { + src11 = conv_f16_to_double((ULong)srcA_word[i][1]); + src21 = conv_f16_to_double((ULong)srcB_word[j][1]); + } + + + prod = src10 * src20; + msum = prod + src11 * src21; + + if (((xmsk >> i) & 0x1) & ((ymsk >> j) & 0x1)) { + /* Note, we do not track the exception handling bits + ox, ux, xx, si, mz, vxsnan and vximz in the FPSCR. */ + + if ( inst == XVF16GER2 ) + result[j] = reinterpret_float_as_int( + conv_double_to_float(msum) ); + + else if ( inst == XVF16GER2PP ) + result[j] = reinterpret_float_as_int( + conv_double_to_float(msum) + + acc_word[j] ); + + else if ( inst == XVF16GER2PN ) + result[j] = reinterpret_float_as_int( + conv_double_to_float(msum) + + negate_float( acc_word[j] ) ); + + else if ( inst == XVF16GER2NP ) + result[j] = reinterpret_float_as_int( + conv_double_to_float( negate_double( msum ) ) + + acc_word[j] ); + + else if ( inst == XVF16GER2NN ) + result[j] = reinterpret_float_as_int( + conv_double_to_float( negate_double( msum ) ) + + negate_float( acc_word[j] ) ); + } else { + result[j] = 0; + } + } + write_ACC_entry (gst, offset_ACC, acc_entry, 3-i, result); + } +} + +void vsx_matrix_32bit_float_ger_dirty_helper( VexGuestPPC64State* gst, + UInt offset_ACC, + ULong srcA_hi, ULong srcA_lo, + ULong srcB_hi, ULong srcB_lo, + UInt masks_inst ) +{ + UInt i, j, mask, inst, acc_entry, prefix_inst; + + Float srcA_word[4]; + Float srcB_word[4]; + UInt acc_word_input[4]; + Float acc_word[4]; + UInt result[4]; + UInt xmsk = 0; + UInt ymsk = 0; + Float src1, src2, acc; + + mask = 0xFFFFFFFF; + inst = (masks_inst >> 5) & 0xFF; + prefix_inst = (masks_inst >> 13) & 0x1; + acc_entry = masks_inst & 0xF; + + if ( prefix_inst == 0 ) { + /* Set the masks for non-prefix instructions */ + xmsk = 0b1111; + ymsk = 0b1111; + + } else { + xmsk = (masks_inst >> 18) & 0xF; + ymsk = (masks_inst >> 14) & 0xF; + } + + srcA_word[3] = reinterpret_int_as_float( (srcA_hi >> 32) & mask ); + srcA_word[2] = reinterpret_int_as_float( srcA_hi & mask ); + srcA_word[1] = reinterpret_int_as_float( (srcA_lo >> 32) & mask ); + srcA_word[0] = reinterpret_int_as_float( srcA_lo & mask ); + + srcB_word[3] = reinterpret_int_as_float( (srcB_hi >> 32) & mask ); + srcB_word[2] = reinterpret_int_as_float( srcB_hi & mask ); + srcB_word[1] = reinterpret_int_as_float( (srcB_lo >> 32) & mask ); + srcB_word[0] = reinterpret_int_as_float( srcB_lo & mask ); + + /* Address byes using IBM numbering */ + for( i = 0; i < 4; i++) { + /* Get the ACC contents directly from the PPC64 state */ + get_ACC_entry (gst, offset_ACC, acc_entry, 3-i, acc_word_input); + + acc_word[3] = reinterpret_int_as_float( acc_word_input[3] ); + acc_word[2] = reinterpret_int_as_float( acc_word_input[2] ); + acc_word[1] = reinterpret_int_as_float( acc_word_input[1] ); + acc_word[0] = reinterpret_int_as_float( acc_word_input[0] ); + + for( j = 0; j < 4; j++) { + + if ((((xmsk >> i) & 0x1) & ((ymsk >> j) & 0x1)) == 0x1) { + /* Note, we do not track the exception handling bits + ox, ux, xx, si, mz, vxsnan and vximz in the FPSCR. */ + + src1 = srcA_word[i]; + src2 = srcB_word[j]; + acc = acc_word[j]; + + if ( inst == XVF32GER ) + result[j] = reinterpret_float_as_int( src1 * src2 ); + + else if ( inst == XVF32GERPP ) + result[j] = reinterpret_float_as_int( ( src1 * src2 ) + acc ); + + else if ( inst == XVF32GERPN ) + result[j] = reinterpret_float_as_int( ( src1 * src2 ) + + negate_float( acc ) ); + + else if ( inst == XVF32GERNP ) + result[j] = reinterpret_float_as_int( + negate_float( src1 * src2 ) + acc ); + + else if ( inst == XVF32GERNN ) + result[j] = reinterpret_float_as_int( + negate_float( src1 * src2 ) + negate_float( acc ) ); + } else { + result[j] = 0; + } + } + write_ACC_entry (gst, offset_ACC, acc_entry, 3-i, result); + } +} + +void vsx_matrix_64bit_float_ger_dirty_helper( VexGuestPPC64State* gst, + UInt offset_ACC, + ULong srcX_hi, ULong srcX_lo, + ULong srcY_hi, ULong srcY_lo, + UInt masks_inst ) +{ + /* This function just computes the result for one entry in the ACC. */ + UInt i, j, inst, acc_entry, prefix_inst; + + Double srcX_dword[4]; + Double srcY_dword[2]; + Double result[2]; + UInt result_uint[4]; + ULong result_ulong[2]; + Double acc_dword[4]; + ULong acc_word_ulong[2]; + UInt acc_word_input[4]; + UInt xmsk = 0; + UInt ymsk = 0; + UInt start_i; + Double src1, src2, acc; + + inst = (masks_inst >> 8) & 0xFF; + prefix_inst = (masks_inst >> 16) & 0x1; + start_i = (masks_inst >> 4) & 0xF; + acc_entry = masks_inst & 0xF; + + if ( prefix_inst == 0 ) { + /* Set the masks for non-prefix instructions */ + xmsk = 0b1111; + ymsk = 0b11; + + } else { + xmsk = (masks_inst >> 21) & 0xF; + ymsk = (masks_inst >> 19) & 0x3; + } + + /* Need to store the srcX_dword in the correct index for the following + for loop. */ + srcX_dword[1+start_i] = reinterpret_long_as_double( srcX_lo); + srcX_dword[0+start_i] = reinterpret_long_as_double( srcX_hi ); + srcY_dword[1] = reinterpret_long_as_double( srcY_lo ); + srcY_dword[0] = reinterpret_long_as_double( srcY_hi ); + + for( i = start_i; i < start_i+2; i++) { + /* Get the ACC contents directly from the PPC64 state */ + get_ACC_entry (gst, offset_ACC, acc_entry, 3 - i, + acc_word_input); + + acc_word_ulong[1] = acc_word_input[3]; + acc_word_ulong[1] = (acc_word_ulong[1] << 32) | acc_word_input[2]; + acc_word_ulong[0] = acc_word_input[1]; + acc_word_ulong[0] = (acc_word_ulong[0] << 32) | acc_word_input[0]; + acc_dword[0] = reinterpret_long_as_double( acc_word_ulong[0] ); + acc_dword[1] = reinterpret_long_as_double( acc_word_ulong[1]); + + for( j = 0; j < 2; j++) { + + if (((xmsk >> i) & 0x1) & ((ymsk >> j) & 0x1)) { + /* Note, we do not track the exception handling bits + ox, ux, xx, si, mz, vxsnan and vximz in the FPSCR. */ + + src1 = srcX_dword[i]; + src2 = srcY_dword[j]; + acc = acc_dword[j]; + + if ( inst == XVF64GER ) + result[j] = src1 * src2; + + else if ( inst == XVF64GERPP ) + result[j] = ( src1 * src2 ) + acc; + + else if ( inst == XVF64GERPN ) + result[j] = ( src1 * src2 ) + negate_double( acc ); + + else if ( inst == XVF64GERNP ) + result[j] = negate_double( src1 * src2 ) + acc; + + else if ( inst == XVF64GERNN ) + result[j] = negate_double( src1 * src2 ) + negate_double( acc ); + + } else { + result[j] = 0; + } + } + + /* Need to store the two double float values as two unsigned ints in + order to store them to the ACC. */ + result_ulong[0] = reinterpret_double_as_long ( result[0] ); + result_ulong[1] = reinterpret_double_as_long ( result[1] ); + + result_uint[0] = result_ulong[0] & 0xFFFFFFFF; + result_uint[1] = (result_ulong[0] >> 32) & 0xFFFFFFFF; + result_uint[2] = result_ulong[1] & 0xFFFFFFFF; + result_uint[3] = (result_ulong[1] >> 32) & 0xFFFFFFFF; + + write_ACC_entry (gst, offset_ACC, acc_entry, 3 - i, + result_uint); + } +} + /*----------------------------------------------*/ /*--- The exported fns .. ---*/ /*----------------------------------------------*/ @@ -915,6 +1723,39 @@ void LibVEX_GuestPPC32_initialise ( /*OUT*/VexGuestPPC32State* vex_state ) VECZERO(vex_state->guest_VSR62); VECZERO(vex_state->guest_VSR63); + VECZERO( vex_state->guest_ACC_0_r0 ); + VECZERO( vex_state->guest_ACC_0_r1 ); + VECZERO( vex_state->guest_ACC_0_r2 ); + VECZERO( vex_state->guest_ACC_0_r3 ); + VECZERO( vex_state->guest_ACC_1_r0 ); + VECZERO( vex_state->guest_ACC_1_r1 ); + VECZERO( vex_state->guest_ACC_1_r2 ); + VECZERO( vex_state->guest_ACC_1_r3 ); + VECZERO( vex_state->guest_ACC_2_r0 ); + VECZERO( vex_state->guest_ACC_2_r1 ); + VECZERO( vex_state->guest_ACC_2_r2 ); + VECZERO( vex_state->guest_ACC_2_r3 ); + VECZERO( vex_state->guest_ACC_3_r0 ); + VECZERO( vex_state->guest_ACC_3_r1 ); + VECZERO( vex_state->guest_ACC_3_r2 ); + VECZERO( vex_state->guest_ACC_3_r3 ); + VECZERO( vex_state->guest_ACC_4_r0 ); + VECZERO( vex_state->guest_ACC_4_r1 ); + VECZERO( vex_state->guest_ACC_4_r2 ); + VECZERO( vex_state->guest_ACC_4_r3 ); + VECZERO( vex_state->guest_ACC_5_r0 ); + VECZERO( vex_state->guest_ACC_5_r1 ); + VECZERO( vex_state->guest_ACC_5_r2 ); + VECZERO( vex_state->guest_ACC_5_r3 ); + VECZERO( vex_state->guest_ACC_6_r0 ); + VECZERO( vex_state->guest_ACC_6_r1 ); + VECZERO( vex_state->guest_ACC_6_r2 ); + VECZERO( vex_state->guest_ACC_6_r3 ); + VECZERO( vex_state->guest_ACC_7_r0 ); + VECZERO( vex_state->guest_ACC_7_r1 ); + VECZERO( vex_state->guest_ACC_7_r2 ); + VECZERO( vex_state->guest_ACC_7_r3 ); + # undef VECZERO vex_state->guest_CIA = 0; @@ -1160,6 +2001,7 @@ void LibVEX_GuestPPC64_initialise ( /*OUT*/VexGuestPPC64State* vex_state ) vex_state->guest_PPR = 0x4ULL << 50; // medium priority vex_state->guest_PSPB = 0x100; // an arbitrary non-zero value to start with vex_state->guest_DSCR = 0; + } diff --git a/VEX/priv/guest_ppc_toIR.c b/VEX/priv/guest_ppc_toIR.c index 9192436924..c9c058a7ab 100644 --- a/VEX/priv/guest_ppc_toIR.c +++ b/VEX/priv/guest_ppc_toIR.c @@ -343,6 +343,38 @@ static Bool OV32_CA32_supported = False; #define OFFB_PPR offsetofPPCGuestState(guest_PPR) #define OFFB_PSPB offsetofPPCGuestState(guest_PSPB) #define OFFB_DSCR offsetofPPCGuestState(guest_DSCR) +#define OFFB_ACC_0_r0 offsetofPPCGuestState(guest_ACC_0_r0) +#define OFFB_ACC_0_r1 offsetofPPCGuestState(guest_ACC_0_r1) +#define OFFB_ACC_0_r2 offsetofPPCGuestState(guest_ACC_0_r2) +#define OFFB_ACC_0_r3 offsetofPPCGuestState(guest_ACC_0_r3) +#define OFFB_ACC_1_r0 offsetofPPCGuestState(guest_ACC_1_r0) +#define OFFB_ACC_1_r1 offsetofPPCGuestState(guest_ACC_1_r1) +#define OFFB_ACC_1_r2 offsetofPPCGuestState(guest_ACC_1_r2) +#define OFFB_ACC_1_r3 offsetofPPCGuestState(guest_ACC_1_r3) +#define OFFB_ACC_2_r0 offsetofPPCGuestState(guest_ACC_2_r0) +#define OFFB_ACC_2_r1 offsetofPPCGuestState(guest_ACC_2_r1) +#define OFFB_ACC_2_r2 offsetofPPCGuestState(guest_ACC_2_r2) +#define OFFB_ACC_2_r3 offsetofPPCGuestState(guest_ACC_2_r3) +#define OFFB_ACC_3_r0 offsetofPPCGuestState(guest_ACC_3_r0) +#define OFFB_ACC_3_r1 offsetofPPCGuestState(guest_ACC_3_r1) +#define OFFB_ACC_3_r2 offsetofPPCGuestState(guest_ACC_3_r2) +#define OFFB_ACC_3_r3 offsetofPPCGuestState(guest_ACC_3_r3) +#define OFFB_ACC_4_r0 offsetofPPCGuestState(guest_ACC_4_r0) +#define OFFB_ACC_4_r1 offsetofPPCGuestState(guest_ACC_4_r1) +#define OFFB_ACC_4_r2 offsetofPPCGuestState(guest_ACC_4_r2) +#define OFFB_ACC_4_r3 offsetofPPCGuestState(guest_ACC_4_r3) +#define OFFB_ACC_5_r0 offsetofPPCGuestState(guest_ACC_5_r0) +#define OFFB_ACC_5_r1 offsetofPPCGuestState(guest_ACC_5_r1) +#define OFFB_ACC_5_r2 offsetofPPCGuestState(guest_ACC_5_r2) +#define OFFB_ACC_5_r3 offsetofPPCGuestState(guest_ACC_5_r3) +#define OFFB_ACC_6_r0 offsetofPPCGuestState(guest_ACC_6_r0) +#define OFFB_ACC_6_r1 offsetofPPCGuestState(guest_ACC_6_r1) +#define OFFB_ACC_6_r2 offsetofPPCGuestState(guest_ACC_6_r2) +#define OFFB_ACC_6_r3 offsetofPPCGuestState(guest_ACC_6_r3) +#define OFFB_ACC_7_r0 offsetofPPCGuestState(guest_ACC_7_r0) +#define OFFB_ACC_7_r1 offsetofPPCGuestState(guest_ACC_7_r1) +#define OFFB_ACC_7_r2 offsetofPPCGuestState(guest_ACC_7_r2) +#define OFFB_ACC_7_r3 offsetofPPCGuestState(guest_ACC_7_r3) /*------------------------------------------------------------*/ @@ -495,6 +527,11 @@ static inline UChar ifieldSHW ( UInt instr ) return ifieldDM ( instr ); } +/* Extract AT field from theInstr 8LS:D form */ +static UChar ifieldAT ( UInt instr ) { + return toUChar( IFIELD( instr, 23, 3 ) ); +} + /*------------------------------------------------------------*/ /*--- Guest-state identifiers ---*/ /*------------------------------------------------------------*/ @@ -526,6 +563,40 @@ typedef enum { * needed. */ PPC_GST_DSCR, // Data Stream Control Register + PPC_GST_ACC_0_r0, /* Accumulator register file. Eight accumulators each + * with four 128-bit registers. + */ + PPC_GST_ACC_0_r1, + PPC_GST_ACC_0_r2, + PPC_GST_ACC_0_r3, + PPC_GST_ACC_1_r0, + PPC_GST_ACC_1_r1, + PPC_GST_ACC_1_r2, + PPC_GST_ACC_1_r3, + PPC_GST_ACC_2_r0, + PPC_GST_ACC_2_r1, + PPC_GST_ACC_2_r2, + PPC_GST_ACC_2_r3, + PPC_GST_ACC_3_r0, + PPC_GST_ACC_3_r1, + PPC_GST_ACC_3_r2, + PPC_GST_ACC_3_r3, + PPC_GST_ACC_4_r0, + PPC_GST_ACC_4_r1, + PPC_GST_ACC_4_r2, + PPC_GST_ACC_4_r3, + PPC_GST_ACC_5_r0, + PPC_GST_ACC_5_r1, + PPC_GST_ACC_5_r2, + PPC_GST_ACC_5_r3, + PPC_GST_ACC_6_r0, + PPC_GST_ACC_6_r1, + PPC_GST_ACC_6_r2, + PPC_GST_ACC_6_r3, + PPC_GST_ACC_7_r0, + PPC_GST_ACC_7_r1, + PPC_GST_ACC_7_r2, + PPC_GST_ACC_7_r3, PPC_GST_MAX } PPC_GST; @@ -3994,6 +4065,264 @@ static IRExpr* /* ::Ity_I32 */ getFPCC ( void ) return mkexpr(val); } +/*-----------------------------------------------------------*/ +/* Helpers to access VSX Accumulator register file + *-----------------------------------------------------------*/ +static void putACC( UInt index, UInt reg, IRExpr* src ) +{ + switch (index) { + case 0: + switch (reg) { + case 0: + stmt( IRStmt_Put( OFFB_ACC_0_r0, src ) ); + break; + case 1: + stmt( IRStmt_Put( OFFB_ACC_0_r1, src ) ); + break; + case 2: + stmt( IRStmt_Put( OFFB_ACC_0_r2, src ) ); + break; + case 3: + stmt( IRStmt_Put( OFFB_ACC_0_r3, src ) ); + break; + } + break; + + case 1: + switch (reg) { + case 0: + stmt( IRStmt_Put( OFFB_ACC_1_r0, src ) ); + break; + case 1: + stmt( IRStmt_Put( OFFB_ACC_1_r1, src ) ); + break; + case 2: + stmt( IRStmt_Put( OFFB_ACC_1_r2, src ) ); + break; + case 3: + stmt( IRStmt_Put( OFFB_ACC_1_r3, src ) ); + break; + } + break; + + case 2: + switch (reg) { + case 0: + stmt( IRStmt_Put( OFFB_ACC_2_r0, src ) ); + break; + case 1: + stmt( IRStmt_Put( OFFB_ACC_2_r1, src ) ); + break; + case 2: + stmt( IRStmt_Put( OFFB_ACC_2_r2, src ) ); + break; + case 3: + stmt( IRStmt_Put( OFFB_ACC_2_r3, src ) ); + break; + } + break; + + case 3: + switch (reg) { + case 0: + stmt( IRStmt_Put( OFFB_ACC_3_r0, src ) ); + break; + case 1: + stmt( IRStmt_Put( OFFB_ACC_3_r1, src ) ); + break; + case 2: + stmt( IRStmt_Put( OFFB_ACC_3_r2, src ) ); + break; + case 3: + stmt( IRStmt_Put( OFFB_ACC_3_r3, src ) ); + break; + } + break; + + case 4: + switch (reg) { + case 0: + stmt( IRStmt_Put( OFFB_ACC_4_r0, src ) ); + break; + case 1: + stmt( IRStmt_Put( OFFB_ACC_4_r1, src ) ); + break; + case 2: + stmt( IRStmt_Put( OFFB_ACC_4_r2, src ) ); + break; + case 3: + stmt( IRStmt_Put( OFFB_ACC_4_r3, src ) ); + break; + } + break; + + case 5: + switch (reg) { + case 0: + stmt( IRStmt_Put( OFFB_ACC_5_r0, src ) ); + break; + case 1: + stmt( IRStmt_Put( OFFB_ACC_5_r1, src ) ); + break; + case 2: + stmt( IRStmt_Put( OFFB_ACC_5_r2, src ) ); + break; + case 3: + stmt( IRStmt_Put( OFFB_ACC_5_r3, src ) ); + break; + } + break; + + case 6: + switch (reg) { + case 0: + stmt( IRStmt_Put( OFFB_ACC_6_r0, src ) ); + break; + case 1: + stmt( IRStmt_Put( OFFB_ACC_6_r1, src ) ); + break; + case 2: + stmt( IRStmt_Put( OFFB_ACC_6_r2, src ) ); + break; + case 3: + stmt( IRStmt_Put( OFFB_ACC_6_r3, src ) ); + break; + } + break; + + case 7: + switch (reg) { + case 0: + stmt( IRStmt_Put( OFFB_ACC_7_r0, src ) ); + break; + case 1: + stmt( IRStmt_Put( OFFB_ACC_7_r1, src ) ); + break; + case 2: + stmt( IRStmt_Put( OFFB_ACC_7_r2, src ) ); + break; + case 3: + stmt( IRStmt_Put( OFFB_ACC_7_r3, src ) ); + break; + } + break; + } +} + +static IRExpr* /* :: Ity_V128 */ getACC ( UInt index, UInt reg ) +{ + vassert( (index >= 0) && (index < 8) ); + vassert( (reg >= 0) && (reg < 4) ); + // vex_printf("getACC (%d, %d)) \n", index, reg); + switch (index) { + case 0: + switch (reg) { + case 0: + return IRExpr_Get( OFFB_ACC_0_r0, Ity_V128 ); + case 1: + return IRExpr_Get( OFFB_ACC_0_r1, Ity_V128 ); + case 2: + return IRExpr_Get( OFFB_ACC_0_r2, Ity_V128 ); + case 3: + return IRExpr_Get( OFFB_ACC_0_r3, Ity_V128 ); + } + break; + + case 1: + switch (reg) { + case 0: + return IRExpr_Get( OFFB_ACC_1_r0, Ity_V128 ); + case 1: + return IRExpr_Get( OFFB_ACC_1_r1, Ity_V128 ); + case 2: + return IRExpr_Get( OFFB_ACC_1_r2, Ity_V128 ); + case 3: + return IRExpr_Get( OFFB_ACC_1_r3, Ity_V128 ); + } + break; + + case 2: + switch (reg) { + case 0: + return IRExpr_Get( OFFB_ACC_2_r0, Ity_V128 ); + case 1: + return IRExpr_Get( OFFB_ACC_2_r1, Ity_V128 ); + case 2: + return IRExpr_Get( OFFB_ACC_2_r2, Ity_V128 ); + case 3: + return IRExpr_Get( OFFB_ACC_2_r3, Ity_V128 ); + } + break; + + case 3: + switch (reg) { + case 0: + return IRExpr_Get( OFFB_ACC_3_r0, Ity_V128 ); + case 1: + return IRExpr_Get( OFFB_ACC_3_r1, Ity_V128 ); + case 2: + return IRExpr_Get( OFFB_ACC_3_r2, Ity_V128 ); + case 3: + return IRExpr_Get( OFFB_ACC_3_r3, Ity_V128 ); + } + break; + + case 4: + switch (reg) { + case 0: + return IRExpr_Get( OFFB_ACC_4_r0, Ity_V128 ); + case 1: + return IRExpr_Get( OFFB_ACC_4_r1, Ity_V128 ); + case 2: + return IRExpr_Get( OFFB_ACC_4_r2, Ity_V128 ); + case 3: + return IRExpr_Get( OFFB_ACC_4_r3, Ity_V128 ); + } + break; + + case 5: + switch (reg) { + case 0: + return IRExpr_Get( OFFB_ACC_5_r0, Ity_V128 ); + case 1: + return IRExpr_Get( OFFB_ACC_5_r1, Ity_V128 ); + case 2: + return IRExpr_Get( OFFB_ACC_5_r2, Ity_V128 ); + case 3: + return IRExpr_Get( OFFB_ACC_5_r3, Ity_V128 ); + } + break; + + case 6: + switch (reg) { + case 0: + return IRExpr_Get( OFFB_ACC_6_r0, Ity_V128 ); + case 1: + return IRExpr_Get( OFFB_ACC_6_r1, Ity_V128 ); + case 2: + return IRExpr_Get( OFFB_ACC_6_r2, Ity_V128 ); + case 3: + return IRExpr_Get( OFFB_ACC_6_r3, Ity_V128 ); + } + break; + + case 7: + switch (reg) { + case 0: + return IRExpr_Get( OFFB_ACC_7_r0, Ity_V128 ); + case 1: + return IRExpr_Get( OFFB_ACC_7_r1, Ity_V128 ); + case 2: + return IRExpr_Get( OFFB_ACC_7_r2, Ity_V128 ); + case 3: + return IRExpr_Get( OFFB_ACC_7_r3, Ity_V128 ); + } + break; + } + return 0; // error +} + + /*------------------------------------------------------------*/ /* Helpers for VSX instructions that do floating point * operations and need to determine if a src contains a @@ -5438,35 +5767,23 @@ static IRExpr * vector_evaluate_inst ( const VexAbiInfo* vbi, IRExpr *srcA, IRExpr *srcB, IRExpr *srcC, IRExpr *IMM ){ /* This function implements the ISA 3.1 instruction xxeval. The - instruction is too complex to do with Iops. An Iop implementation is - expected to exhaust memory and be really complex to write, debug and - understand. The second option would be to just map it to a new Iop. - Unfortunately, I doubt any other architecture will implement it making - the Iop PPC specific which isn't really attractive. It would need - extensive documenation for the Iop definition for anyone else to - understand what it does. That leaves doing it as a clean helper. This - is not the ideal option, but was chosen for now to help document what - the instruction does. Discuss this with Julian before committing to - decide if we really want to use this approach or map the instructioin - to a new IOP. */ - /* FIX ME, CARLL 11/8/2018*/ + instruction is too complex to do with Iops. */ /* The instruction description, note the IBM bit numbering is left to right: - For each integer value i, 0 to 127, do the following. + For each integer value i, 0 to 127, do the following. - Let j be the value of the concatenation of the contents of bit i of - srcA, bit i of srcB, bit i of srcC. (j = srcA[i] | srcB[i] | srcC[i]) + Let j be the value of the concatenation of the contents of bit i of + srcA, bit i of srcB, bit i of srcC. (j = srcA[i] | srcB[i] | srcC[i]) - The value of bit IMM[j] is placed into bit result[i]. + The value of bit IMM[j] is placed into bit result[i]. Basically the instruction lets you set each of the 128 bits in the result by selecting one of the eight bits in the IMM value. */ - /* Calling clean helpers with 128-bit args is currently not supported. It - isn't worth adding the support. We will simply call a 64-bit helper to - do the upper 64-bits of the result and the lower 64-bits of the result. - */ + /* Calling clean helpers with 128-bit args is currently not supported. We + will simply call a 64-bit clean helper to do the upper 64-bits of the + result and then call it do do the lower 64-bits of the result. */ IRTemp result_hi = newTemp( Ity_I64 ); IRTemp result_lo = newTemp( Ity_I64 ); @@ -5501,6 +5818,295 @@ static IRExpr * vector_evaluate_inst ( const VexAbiInfo* vbi, return binop( Iop_64HLtoV128, mkexpr( result_hi ), mkexpr( result_lo ) ); } +static void setup_fxstate_struct( IRDirty* d, UInt AT, IREffect AT_fx ) { + /* declare guest state effects, writing to four ACC 128-bit regs. */ + d->nFxState = 4; + vex_bzero(&d->fxState, sizeof(d->fxState)); + d->fxState[0].fx = AT_fx; + d->fxState[0].size = sizeof(U128); + d->fxState[1].fx = AT_fx; + d->fxState[1].size = sizeof(U128); + d->fxState[2].fx = AT_fx; + d->fxState[2].size = sizeof(U128); + d->fxState[3].fx = AT_fx; + d->fxState[3].size = sizeof(U128); + + switch (AT) { + case 0: + d->fxState[0].offset = OFFB_ACC_0_r0; + d->fxState[1].offset = OFFB_ACC_0_r1; + d->fxState[2].offset = OFFB_ACC_0_r2; + d->fxState[3].offset = OFFB_ACC_0_r3; + break; + case 1: + d->fxState[0].offset = OFFB_ACC_1_r0; + d->fxState[1].offset = OFFB_ACC_1_r1; + d->fxState[2].offset = OFFB_ACC_1_r2; + d->fxState[3].offset = OFFB_ACC_1_r3; + break; + case 2: + d->fxState[0].offset = OFFB_ACC_2_r0; + d->fxState[1].offset = OFFB_ACC_2_r1; + d->fxState[2].offset = OFFB_ACC_2_r2; + d->fxState[3].offset = OFFB_ACC_2_r3; + break; + case 3: + d->fxState[0].offset = OFFB_ACC_3_r0; + d->fxState[1].offset = OFFB_ACC_3_r1; + d->fxState[2].offset = OFFB_ACC_3_r2; + d->fxState[3].offset = OFFB_ACC_3_r3; + break; + case 4: + d->fxState[0].offset = OFFB_ACC_4_r0; + d->fxState[1].offset = OFFB_ACC_4_r1; + d->fxState[2].offset = OFFB_ACC_4_r2; + d->fxState[3].offset = OFFB_ACC_4_r3; + break; + case 5: + d->fxState[0].offset = OFFB_ACC_5_r0; + d->fxState[1].offset = OFFB_ACC_5_r1; + d->fxState[2].offset = OFFB_ACC_5_r2; + d->fxState[3].offset = OFFB_ACC_5_r3; + break; + case 6: + d->fxState[0].offset = OFFB_ACC_6_r0; + d->fxState[1].offset = OFFB_ACC_6_r1; + d->fxState[2].offset = OFFB_ACC_6_r2; + d->fxState[3].offset = OFFB_ACC_6_r3; + break; + case 7: + d->fxState[0].offset = OFFB_ACC_7_r0; + d->fxState[1].offset = OFFB_ACC_7_r1; + d->fxState[2].offset = OFFB_ACC_7_r2; + d->fxState[3].offset = OFFB_ACC_7_r3; + break; + default: + vassert( (AT >= 0) && (AT < 8)); + } + return; +} +#define MATRIX_4BIT_INT_GER 1 +#define MATRIX_8BIT_INT_GER 2 +#define MATRIX_16BIT_INT_GER 3 +#define MATRIX_16BIT_FLOAT_GER 4 +#define MATRIX_32BIT_FLOAT_GER 5 +/* Note, the 64-bit float instructions have their caller. */ + +static void vsx_matrix_ger ( const VexAbiInfo* vbi, + UInt inst_class, + IRExpr *srcA, IRExpr *srcB, + UInt AT, UInt mask_inst ) { + /* This helper function does the VSX Matrix 4-bit Signed Integer GER + (Rank-8 Update) instructions xvi4ger8, xvi4ger8pp, pmxvi4ger8, + pmxvi4ger8pp. The instructions work on four V128 values, and three + 8-bit masks. */ + + IRTemp srcA_hi = newTemp( Ity_I64); + IRTemp srcA_lo = newTemp( Ity_I64); + IRTemp srcB_hi = newTemp( Ity_I64); + IRTemp srcB_lo = newTemp( Ity_I64); + IRDirty* d; + UInt instruction = mask_inst & 0xFF; /* Instruction is lower 8-bits. */ + IREffect AT_fx; + + assign( srcA_hi, unop( Iop_V128HIto64, srcA ) ); + assign( srcA_lo, unop( Iop_V128to64, srcA ) ); + assign( srcB_hi, unop( Iop_V128HIto64, srcB ) ); + assign( srcB_lo, unop( Iop_V128to64, srcB ) ); + + /* Using a dirty helper so we can access the contents of the ACC for use in + by the instruction and then write the result directly back to the ACC. + The dirty helper does not return data. */ + IRExpr** args = mkIRExprVec_7( + IRExpr_GSPTR(), + mkU32(offsetofPPCGuestState(guest_ACC_0_r0)), + mkexpr(srcA_hi), mkexpr(srcA_lo), + mkexpr(srcB_hi), mkexpr(srcB_lo), + mkU32( (mask_inst << 5) | AT )); + + /* Set AT_fx to Write if the instruction only writes the ACC. Set + AT_fx to modify if the instruction uses the AT entry and writes + to the ACC entry. */ + switch (instruction) { + case XVI4GER8: + case XVI8GER4: + case XVI16GER2: + case XVI16GER2S: + case XVF16GER2: + case XVF32GER: + AT_fx = Ifx_Write; + break; + case XVI4GER8PP: + case XVI8GER4PP: + case XVI16GER2PP: + case XVI16GER2SPP: + case XVF16GER2PP: + case XVF16GER2PN: + case XVF16GER2NP: + case XVF16GER2NN: + case XVF32GERPP: + case XVF32GERPN: + case XVF32GERNP: + case XVF32GERNN: + AT_fx = Ifx_Modify; + break; + default: + vassert(0); /* Unknown instruction */ + } + + switch(inst_class) { + case MATRIX_4BIT_INT_GER: + + d = unsafeIRDirty_0_N ( + 0/*regparms*/, + "vsx_matrix_4bit_ger_dirty_helper", + fnptr_to_fnentry( vbi, &vsx_matrix_4bit_ger_dirty_helper ), + args ); + break; + + case MATRIX_8BIT_INT_GER: + + d = unsafeIRDirty_0_N ( + 0/*regparms*/, + "vsx_matrix_8bit_ger_dirty_helper", + fnptr_to_fnentry( vbi, &vsx_matrix_8bit_ger_dirty_helper ), + args ); + break; + + case MATRIX_16BIT_INT_GER: + + d = unsafeIRDirty_0_N ( + 0/*regparms*/, + "vsx_matrix_16bit_ger_dirty_helper", + fnptr_to_fnentry( vbi, &vsx_matrix_16bit_ger_dirty_helper ), + args ); + break; + + case MATRIX_16BIT_FLOAT_GER: + + d = unsafeIRDirty_0_N ( + 0/*regparms*/, + "vsx_matrix_16bit_float_ger_dirty_helper", + fnptr_to_fnentry( vbi, &vsx_matrix_16bit_float_ger_dirty_helper ), + args ); + break; + + case MATRIX_32BIT_FLOAT_GER: + + d = unsafeIRDirty_0_N ( + 0/*regparms*/, + "vsx_matrix_32bit_float_ger_dirty_helper", + fnptr_to_fnentry( vbi, &vsx_matrix_32bit_float_ger_dirty_helper ), + args ); + break; + + default: + vex_printf("ERROR: Unkown inst_class = %u in vsx_matrix_ger()\n", + inst_class); + return; + } + + setup_fxstate_struct( d, AT, AT_fx ); + + /* execute the dirty call, side-effecting guest state */ + stmt( IRStmt_Dirty(d) ); +} + +static void vsx_matrix_64bit_float_ger ( const VexAbiInfo* vbi, + IRExpr *srcA, IRExpr *srcA1, + IRExpr *srcB, + UInt AT, UInt mask_inst ) { + /* This helper function does the VSX Matrix 64-bit floating-point GER + (Rank-1 Update) instructions xvf64ger, xvf64gerpp, xvf64gerpn, + xvf64gernp, xvf64gernn, pmxvf64ger, pmxvf64gerpp, pmxvf64gerpn, + pmxvf64gernp, pmxvf64gernn. */ + IRTemp srcX_hi = newTemp( Ity_I64); + IRTemp srcX_lo = newTemp( Ity_I64); + IRTemp srcX1_hi = newTemp( Ity_I64); + IRTemp srcX1_lo = newTemp( Ity_I64); + IRTemp srcY_hi = newTemp( Ity_I64); + IRTemp srcY_lo = newTemp( Ity_I64); + UInt start_i; + IRDirty* d; + ULong combined_args; + UInt instruction = mask_inst & 0xFF; /* Instruction is lower 8-bits. */ + IREffect AT_fx; + + assign( srcX_lo, unop( Iop_V128HIto64, srcA ) ); + assign( srcX_hi, unop( Iop_V128to64, srcA ) ); + assign( srcX1_lo, unop( Iop_V128HIto64, srcA1 ) ); + assign( srcX1_hi, unop( Iop_V128to64, srcA1 ) ); + assign( srcY_lo, unop( Iop_V128HIto64, srcB ) ); + assign( srcY_hi, unop( Iop_V128to64, srcB ) ); + + /* Using a dirty helper so we can access the contents of the ACC for use in + by the instruction and then write the result directly back to the ACC. + The dirty helper does not return data. + + There is a restriction of 8 args in a dirty helper. Can't pass the four + srcX values. So, just do two calls calculating the first two ACC + results then the second two ACC results. */ + + start_i = 0; + combined_args = (mask_inst << 8) | (start_i << 4) | AT; + + IRExpr** args1 = mkIRExprVec_7( + IRExpr_GSPTR(), + mkU32( offsetofPPCGuestState(guest_ACC_0_r0) ), + mkexpr(srcX1_hi), mkexpr(srcX1_lo), + mkexpr(srcY_hi), mkexpr(srcY_lo), + mkU32( combined_args )); + + /* Set AT_fx to Write if the instruction only writes the ACC. Set + AT_fx to modify if the instruction uses the AT entry and writes + to the ACC entry. */ + switch (instruction) { + case XVF64GER: + AT_fx = Ifx_Write; + break; + case XVF64GERPP: + case XVF64GERPN: + case XVF64GERNP: + case XVF64GERNN: + AT_fx = Ifx_Modify; + break; + default: + vassert(0); /* Unknown instruction */ + } + + d = unsafeIRDirty_0_N ( + 0/*regparms*/, + "vsx_matrix_64bit_float_ger_dirty_helper", + fnptr_to_fnentry( vbi, &vsx_matrix_64bit_float_ger_dirty_helper ), + args1 ); + + setup_fxstate_struct( d, AT, AT_fx ); + + /* execute the dirty call, side-effecting guest state */ + stmt( IRStmt_Dirty(d) ); + + start_i = 2; + combined_args = (mask_inst << 8) | (start_i << 4) | AT; + + IRExpr** args2 = mkIRExprVec_7( + IRExpr_GSPTR(), + mkU32( offsetofPPCGuestState(guest_ACC_0_r0) ), + mkexpr(srcX_hi), mkexpr(srcX_lo), + mkexpr(srcY_hi), mkexpr(srcY_lo), + mkU32( combined_args )); + + d = unsafeIRDirty_0_N ( + 0/*regparms*/, + "vsx_matrix_64bit_float_ger_dirty_helper", + fnptr_to_fnentry( vbi, &vsx_matrix_64bit_float_ger_dirty_helper ), + args2 ); + + setup_fxstate_struct( d, AT, AT_fx ); + + /* execute the dirty call, side-effecting guest state */ + stmt( IRStmt_Dirty(d) ); +} + static IRExpr * UNSIGNED_CMP_GT_V128 ( IRExpr *vA, IRExpr *vB ) { /* This function does an unsigned compare of two V128 values. The * function is for use in 32-bit mode only as it is expensive. The @@ -11897,6 +12503,7 @@ static Bool dis_fp_load_prefix ( UInt prefix, UInt theInstr ) switch (opc1) { case 0x30: // lfs (Load Float Single, PPC32 p441) + pDIP( is_prefix, "lfs fr%u,%u(r%u)", frT_addr, immediate_val, rA_addr ); pDIP( is_prefix, "lfs fr%u,%u(r%u)\n", frT_addr, immediate_val, rA_addr ); DIPp( is_prefix, ",%u", R ); putFReg( frT_addr, @@ -11904,7 +12511,7 @@ static Bool dis_fp_load_prefix ( UInt prefix, UInt theInstr ) break; case 0x32: // lfd (Load Float Double, PPC32 p437) - pDIP( is_prefix, "lfd fr%u,%u(r%u)\n", frT_addr, immediate_val, rA_addr ); + pDIP( prefix, "lfd fr%u,%u(r%u)", frT_addr, immediate_val, rA_addr ); DIPp( is_prefix, ",%u", R ); putFReg( frT_addr, load(Ity_F64, mkexpr(EA)) ); break; @@ -33236,6 +33843,450 @@ static Bool dis_test_LSB_by_bit ( UInt prefix, UInt theInstr ) #undef MAX_FIELDS } +static Bool dis_vsx_accumulator_prefix ( UInt prefix, UInt theInstr, + const VexAbiInfo* vbi ) +{ + UChar opc1 = ifieldOPC(theInstr); + UChar opc2 = IFIELD( theInstr, 1, 10); + UInt bit11_15 = IFIELD( theInstr, (31-15), 5); + char AT = ifieldAT(theInstr); + Bool is_prefix = prefix_instruction( prefix ); + UChar rA_addr = ifieldRegA( theInstr ); + UChar rB_addr = ifieldRegB( theInstr ); + + /* Note, not all of the instructions supported by this function are + prefix instructions. */ + if ((opc1 == 0x3b)&& !is_prefix) { + // Note these are not prefix instructions + UInt XO = IFIELD( theInstr, 3, 8); + UInt inst_prefix = 0; + + /* Note vsx_matrix_4bit_ger writes result to ACC register file. */ + switch ( XO ) { + case XVI4GER8: + DIP("xvi4ger8 %u,r%u, r%u\n", AT, rA_addr, rB_addr); + vsx_matrix_ger( vbi, MATRIX_4BIT_INT_GER, + getVSReg( rA_addr ), getVSReg( rB_addr ), + AT, ( ( inst_prefix << 8 ) | XO ) ); + break; + case XVI4GER8PP: + DIP("xvi4ger8pp %u,r%u, r%u\n", AT, rA_addr, rB_addr); + vsx_matrix_ger( vbi, MATRIX_4BIT_INT_GER, + getVSReg( rA_addr ), getVSReg( rB_addr ), + AT, ( ( inst_prefix << 8 ) | XO ) ); + break; + case XVI8GER4: + DIP("xvi8ger4 %u,r%u, r%u\n", AT, rA_addr, rB_addr); + vsx_matrix_ger( vbi, MATRIX_8BIT_INT_GER, + getVSReg( rA_addr ), getVSReg( rB_addr ), + AT, ( ( inst_prefix << 8 ) | XO ) ); + break; + case XVI8GER4PP: + DIP("xvi8ger4pp %u,r%u, r%u\n", AT, rA_addr, rB_addr); + vsx_matrix_ger( vbi, MATRIX_8BIT_INT_GER, + getVSReg( rA_addr ), getVSReg( rB_addr ), + AT, ( ( inst_prefix << 8 ) | XO ) ); + break; + case XVI16GER2S: + DIP("xvi16ger2s %u,r%u, r%u\n", AT, rA_addr, rB_addr); + vsx_matrix_ger( vbi, MATRIX_16BIT_INT_GER, + getVSReg( rA_addr ), getVSReg( rB_addr ), + AT, ( ( inst_prefix << 8 ) | XO ) ); + break; + case XVI16GER2SPP: + DIP("xvi16ger2pps %u,r%u, r%u\n", AT, rA_addr, rB_addr); + vsx_matrix_ger( vbi, MATRIX_16BIT_INT_GER, + getVSReg( rA_addr ), getVSReg( rB_addr ), + AT, ( ( inst_prefix << 8 ) | XO ) ); + break; + case XVF16GER2: + DIP("xvf16ger2 %u,r%u, r%u\n", AT, rA_addr, rB_addr); + vsx_matrix_ger( vbi, MATRIX_16BIT_FLOAT_GER, + getVSReg( rA_addr ), + getVSReg( rB_addr ), AT, + ( ( inst_prefix << 8 ) | XO ) ); + break; + case XVF16GER2PP: + DIP("xvf16ger2pp %u,r%u, r%u\n", AT, rA_addr, rB_addr); + vsx_matrix_ger( vbi, MATRIX_16BIT_FLOAT_GER, + getVSReg( rA_addr ), + getVSReg( rB_addr ), AT, + ( ( inst_prefix << 8 ) | XO ) ); + break; + case XVF16GER2PN: + DIP("xvf16ger2pn %u,r%u, r%u\n", AT, rA_addr, rB_addr); + vsx_matrix_ger( vbi, MATRIX_16BIT_FLOAT_GER, + getVSReg( rA_addr ), + getVSReg( rB_addr ), AT, + ( ( inst_prefix << 8 ) | XO ) ); + break; + case XVF16GER2NP: + DIP("xvf16ger2np %u,r%u, r%u\n", AT, rA_addr, rB_addr); + vsx_matrix_ger( vbi, MATRIX_16BIT_FLOAT_GER, + getVSReg( rA_addr ), + getVSReg( rB_addr ), AT, + ( ( inst_prefix << 8 ) | XO ) ); + break; + case XVF16GER2NN: + DIP("xvf16ger2nn %u,r%u, r%u\n", AT, rA_addr, rB_addr); + vsx_matrix_ger( vbi, MATRIX_16BIT_FLOAT_GER, + getVSReg( rA_addr ), + getVSReg( rB_addr ), AT, + ( ( inst_prefix << 8 ) | XO ) ); + break; + case XVF32GER: + DIP("xvf32ger %u,r%u, r%u\n", AT, rA_addr, rB_addr); + vsx_matrix_ger( vbi, MATRIX_32BIT_FLOAT_GER, + getVSReg( rA_addr ), + getVSReg( rB_addr ), AT, + ( ( inst_prefix << 8 ) | XO ) ); + break; + case XVF32GERPP: + DIP("xvf32gerpp %u,r%u, r%u\n", AT, rA_addr, rB_addr); + vsx_matrix_ger( vbi, MATRIX_32BIT_FLOAT_GER, + getVSReg( rA_addr ), + getVSReg( rB_addr ), AT, + ( ( inst_prefix << 8 ) | XO ) ); + break; + case XVF32GERPN: + DIP("xvf32gerpn %u,r%u, r%u\n", AT, rA_addr, rB_addr); + vsx_matrix_ger( vbi, MATRIX_32BIT_FLOAT_GER, + getVSReg( rA_addr ), + getVSReg( rB_addr ), AT, + ( ( inst_prefix << 8 ) | XO ) ); + break; + case XVF32GERNP: + DIP("xvf32gernp %u,r%u, r%u\n", AT, rA_addr, rB_addr); + vsx_matrix_ger( vbi, MATRIX_32BIT_FLOAT_GER, + getVSReg( rA_addr ), + getVSReg( rB_addr ), AT, + ( ( inst_prefix << 8 ) | XO ) ); + break; + case XVF32GERNN: + DIP("xvf32gernn %u,r%u, r%u\n", AT, rA_addr, rB_addr); + vsx_matrix_ger( vbi, MATRIX_32BIT_FLOAT_GER, + getVSReg( rA_addr ), + getVSReg( rB_addr ), AT, + ( ( inst_prefix << 8 ) | XO ) ); + break; + case XVF64GER: + DIP("xvf64ger %u,r%u, r%u\n", AT, rA_addr, rB_addr); + vsx_matrix_64bit_float_ger( vbi, getVSReg( rA_addr ), + getVSReg( rA_addr+1 ), + getVSReg( rB_addr ), AT, + ( ( inst_prefix << 8 ) | XO ) ); + break; + case XVF64GERPP: + DIP("xvfd642gerpp %u,r%u, r%u\n", AT, rA_addr, rB_addr); + vsx_matrix_64bit_float_ger( vbi, getVSReg( rA_addr ), + getVSReg( rA_addr+1 ), + getVSReg( rB_addr ), AT, + ( ( inst_prefix << 8 ) | XO ) ); + break; + case XVF64GERPN: + DIP("xvf64gerpn %u,r%u, r%u\n", AT, rA_addr, rB_addr); + vsx_matrix_64bit_float_ger( vbi, getVSReg( rA_addr ), + getVSReg( rA_addr+1 ), + getVSReg( rB_addr ), AT, + ( ( inst_prefix << 8 ) | XO ) ); + break; + case XVF64GERNP: + DIP("xvf64gernp %u,r%u, r%u\n", AT, rA_addr, rB_addr); + vsx_matrix_64bit_float_ger( vbi, getVSReg( rA_addr ), + getVSReg( rA_addr+1 ), + getVSReg( rB_addr ), AT, + ( ( inst_prefix << 8 ) | XO ) ); + break; + case XVF64GERNN: + DIP("xvf64gernn %u,r%u, r%u\n", AT, rA_addr, rB_addr); + vsx_matrix_64bit_float_ger( vbi, getVSReg( rA_addr ), + getVSReg( rA_addr+1 ), + getVSReg( rB_addr ), AT, + ( ( inst_prefix << 8 ) | XO ) ); + break; + default: + vex_printf("ERROR, dis_vsx_accumulator_prefix, Unknown X0 = 0x%x value.\n", XO); + return False; + } + + } else if ((opc1 == 0x3b) && prefix) { + // Note these are prefix instructions + UInt XO = IFIELD( theInstr, 3, 8); + UInt PMSK, XMSK, YMSK, MASKS; + UInt inst_prefix = 0x1; + MASKS = IFIELD( prefix, 0, 16); + + switch ( XO ) { + case XVI4GER8: + PMSK = IFIELD( prefix, 8, 8); + XMSK = IFIELD( prefix, 4, 4); + YMSK = IFIELD( prefix, 0, 4); + + DIP("pmxvi4ger8 %u,r%u, r%u,%u,%u,%u\n", + AT, rA_addr, rB_addr, XMSK, YMSK, PMSK); + vsx_matrix_ger( vbi, MATRIX_4BIT_INT_GER, + getVSReg( rA_addr ), getVSReg( rB_addr ), + AT, + ( (MASKS << 9 ) | ( inst_prefix << 8 ) | XO) ); + break; + case XVI4GER8PP: + PMSK = IFIELD( prefix, 8, 8); + XMSK = IFIELD( prefix, 4, 4); + YMSK = IFIELD( prefix, 0, 4); + DIP("pmxvi4ger8pp %u,r%u, r%u,%u,%u,%u\n", + AT, rA_addr, rB_addr, XMSK, YMSK, PMSK); + vsx_matrix_ger( vbi, MATRIX_4BIT_INT_GER, + getVSReg( rA_addr ), getVSReg( rB_addr ), + AT, + ( (MASKS << 9 ) | ( inst_prefix << 8 ) | XO ) ); + break; + case XVI8GER4: + PMSK = IFIELD( prefix, 12, 4); + XMSK = IFIELD( prefix, 4, 4); + YMSK = IFIELD( prefix, 0, 4); + DIP("pmxvi8ger4 %u,r%u, r%u,%u,%u,%u\n", + AT, rA_addr, rB_addr, XMSK, YMSK, PMSK); + vsx_matrix_ger( vbi, MATRIX_8BIT_INT_GER, + getVSReg( rA_addr ), getVSReg( rB_addr ), + AT, + ( (MASKS << 9 ) | ( inst_prefix << 8 ) | XO ) ); + break; + case XVI8GER4PP: + PMSK = IFIELD( prefix, 12, 4); + XMSK = IFIELD( prefix, 4, 4); + YMSK = IFIELD( prefix, 0, 4); + DIP("pmxvi8ger4pp %u,r%u, r%u,%u,%u,%u\n", + AT, rA_addr, rB_addr, XMSK, YMSK, PMSK); + vsx_matrix_ger( vbi, MATRIX_8BIT_INT_GER, + getVSReg( rA_addr ), getVSReg( rB_addr ), + AT, + ( (MASKS << 9 ) | ( inst_prefix << 8 ) | XO ) ); + break; + case XVI16GER2S: + PMSK = IFIELD( prefix, 14, 2); + XMSK = IFIELD( prefix, 4, 4); + YMSK = IFIELD( prefix, 0, 4); + DIP("pmxvi16ger2s %u,r%u, r%u,%u,%u,%u\n", + AT, rA_addr, rB_addr, XMSK, YMSK, PMSK); + vsx_matrix_ger( vbi, MATRIX_16BIT_INT_GER, + getVSReg( rA_addr ), getVSReg( rB_addr ), + AT, + ( (MASKS << 9 ) | ( inst_prefix << 8 ) | XO ) ); + break; + case XVI16GER2SPP: + PMSK = IFIELD( prefix, 14, 2); + XMSK = IFIELD( prefix, 4, 4); + YMSK = IFIELD( prefix, 0, 4); + DIP("pmxvi16ger2pps %u,r%u, r%u,%u,%u,%u\n", + AT, rA_addr, rB_addr, XMSK, YMSK, PMSK); + vsx_matrix_ger( vbi, MATRIX_16BIT_INT_GER, + getVSReg( rA_addr ), getVSReg( rB_addr ), + AT, + ( (MASKS << 9 ) | ( inst_prefix << 8 ) | XO ) ); + break; + case XVF16GER2: + PMSK = IFIELD( prefix, 14, 2); + XMSK = IFIELD( prefix, 4, 4); + YMSK = IFIELD( prefix, 0, 4); + DIP("pmxvf16ger2 %u,r%u, r%u\n", AT, rA_addr, rB_addr); + vsx_matrix_ger( vbi, MATRIX_16BIT_FLOAT_GER, + getVSReg( rA_addr ), + getVSReg( rB_addr ), + AT, ( (MASKS << 9 ) + | ( inst_prefix << 8 ) | XO ) ); + break; + case XVF16GER2PP: + PMSK = IFIELD( prefix, 14, 2); + XMSK = IFIELD( prefix, 4, 4); + YMSK = IFIELD( prefix, 0, 4); + DIP("pmxvf16ger2pp %u,r%u, r%u\n", AT, rA_addr, rB_addr); + vsx_matrix_ger( vbi, MATRIX_16BIT_FLOAT_GER, + getVSReg( rA_addr ), + getVSReg( rB_addr ), + AT, ( (MASKS << 9 ) + | ( inst_prefix << 8 ) | XO ) ); + break; + case XVF16GER2PN: + PMSK = IFIELD( prefix, 14, 2); + XMSK = IFIELD( prefix, 4, 4); + YMSK = IFIELD( prefix, 0, 4); + DIP("pmxvf16ger2pn %u,r%u, r%u\n", AT, rA_addr, rB_addr); + vsx_matrix_ger( vbi, MATRIX_16BIT_FLOAT_GER, + getVSReg( rA_addr ), + getVSReg( rB_addr ), + AT, ( (MASKS << 9 ) + | ( inst_prefix << 8 ) | XO ) ); + break; + case XVF16GER2NP: + PMSK = IFIELD( prefix, 14, 2); + XMSK = IFIELD( prefix, 4, 4); + YMSK = IFIELD( prefix, 0, 4); + DIP("pmxvf16ger2np %u,r%u, r%u\n", AT, rA_addr, rB_addr); + vsx_matrix_ger( vbi, MATRIX_16BIT_FLOAT_GER, + getVSReg( rA_addr ), + getVSReg( rB_addr ), + AT, ( (MASKS << 9 ) + | ( inst_prefix << 8 ) | XO ) ); + break; + case XVF16GER2NN: + PMSK = IFIELD( prefix, 14, 2); + XMSK = IFIELD( prefix, 4, 4); + YMSK = IFIELD( prefix, 0, 4); + DIP("pmxvf16ger2nn %u,r%u, r%u\n", AT, rA_addr, rB_addr); + vsx_matrix_ger( vbi, MATRIX_16BIT_FLOAT_GER, + getVSReg( rA_addr ), + getVSReg( rB_addr ), + AT, ( (MASKS << 9 ) + | ( inst_prefix << 8 ) | XO ) ); + break; + case XVF32GER: + PMSK = IFIELD( prefix, 14, 2); + XMSK = IFIELD( prefix, 4, 4); + YMSK = IFIELD( prefix, 0, 4); + DIP("pmxvf32ger %u,r%u, r%u\n", AT, rA_addr, rB_addr); + vsx_matrix_ger( vbi, MATRIX_32BIT_FLOAT_GER, + getVSReg( rA_addr ), + getVSReg( rB_addr ), AT, + ( ( MASKS << 9 ) | ( inst_prefix << 8 ) | XO ) ); + break; + case XVF32GERPP: + PMSK = IFIELD( prefix, 14, 2); + XMSK = IFIELD( prefix, 4, 4); + YMSK = IFIELD( prefix, 0, 4); + DIP("pmxvf32gerpp %u,r%u, r%u\n", AT, rA_addr, rB_addr); + vsx_matrix_ger( vbi, MATRIX_32BIT_FLOAT_GER, + getVSReg( rA_addr ), + getVSReg( rB_addr ), AT, + ( ( MASKS << 9) | ( inst_prefix << 8 ) | XO ) ); + break; + case XVF32GERPN: + PMSK = 0; + XMSK = IFIELD( prefix, 4, 4); + YMSK = IFIELD( prefix, 0, 4); + DIP("pmxvf32gerpn %u,r%u, r%u\n", AT, rA_addr, rB_addr); + vsx_matrix_ger( vbi, MATRIX_32BIT_FLOAT_GER, + getVSReg( rA_addr ), + getVSReg( rB_addr ), AT, + ( ( MASKS << 9) | ( inst_prefix << 8 ) | XO ) ); + break; + case XVF32GERNP: + PMSK = 0; + XMSK = IFIELD( prefix, 4, 4); + YMSK = IFIELD( prefix, 0, 4); + DIP("pmxvf32gernp %u,r%u, r%u\n", AT, rA_addr, rB_addr); + vsx_matrix_ger( vbi, MATRIX_32BIT_FLOAT_GER, + getVSReg( rA_addr ), + getVSReg( rB_addr ), AT, + ( ( MASKS << 9) | ( inst_prefix << 8 ) | XO ) ); + break; + case XVF32GERNN: + PMSK = 0; + XMSK = IFIELD( prefix, 4, 4); + YMSK = IFIELD( prefix, 0, 4); + DIP("pmxvf32gernn %u,r%u, r%u\n", AT, rA_addr, rB_addr); + vsx_matrix_ger( vbi, MATRIX_32BIT_FLOAT_GER, + getVSReg( rA_addr ), + getVSReg( rB_addr ), AT, + ( ( MASKS << 9) | ( inst_prefix << 8 ) | XO ) ); + break; + case XVF64GER: + PMSK = 0; + XMSK = IFIELD( prefix, 4, 4); + YMSK = IFIELD( prefix, 2, 2); + DIP("pmxvf64ger %u,r%u, r%u\n", AT, rA_addr, rB_addr); + vsx_matrix_64bit_float_ger( vbi, getVSReg( rA_addr ), + getVSReg( rA_addr+1 ), + getVSReg( rB_addr ), AT, + ( ( MASKS << 9) | ( inst_prefix << 8 ) + | XO ) ); + break; + case XVF64GERPP: + PMSK = 0; + XMSK = IFIELD( prefix, 4, 4); + YMSK = IFIELD( prefix, 2, 2); + DIP("pmxvf64gerpp %u,r%u, r%u\n", AT, rA_addr, rB_addr); + vsx_matrix_64bit_float_ger( vbi, getVSReg( rA_addr ), + getVSReg( rA_addr+1 ), + getVSReg( rB_addr ), AT, + ( ( MASKS << 9) | ( inst_prefix << 8 ) + | XO ) ); + break; + case XVF64GERPN: + PMSK = 0; + XMSK = IFIELD( prefix, 4, 4); + YMSK = IFIELD( prefix, 2, 2); + DIP("pmxvf64gerpn %u,r%u, r%u\n", AT, rA_addr, rB_addr); + vsx_matrix_64bit_float_ger( vbi, getVSReg( rA_addr ), + getVSReg( rA_addr+1 ), + getVSReg( rB_addr ), AT, + ( ( MASKS << 9) | ( inst_prefix << 8 ) + | XO ) ); + break; + case XVF64GERNP: + PMSK = 0; + XMSK = IFIELD( prefix, 4, 4); + YMSK = IFIELD( prefix, 2, 2); + DIP("pmxvf64gernp %u,r%u, r%u\n", AT, rA_addr, rB_addr); + vsx_matrix_64bit_float_ger( vbi, getVSReg( rA_addr ), + getVSReg( rA_addr+1 ), + getVSReg( rB_addr ), AT, + ( ( MASKS << 9) | ( inst_prefix << 8 ) + | XO ) ); + break; + case XVF64GERNN: + PMSK = 0; + XMSK = IFIELD( prefix, 4, 4); + YMSK = IFIELD( prefix, 2, 2); + DIP("pmxvf64gernn %u,r%u, r%u\n", AT, rA_addr, rB_addr); + vsx_matrix_64bit_float_ger( vbi, getVSReg( rA_addr ), + getVSReg( rA_addr+1 ), + getVSReg( rB_addr ), AT, + ( ( MASKS << 9) | ( inst_prefix << 8 ) + | XO ) ); + break; + default: + return False; + } + + } else if ((opc1 == 0x1F) && (opc2 == 0xB1) && (bit11_15 == 0) && !prefix) { + // FYI, this is not a prefix instruction + DIP("xxmfacc %u\n", AT); + + putVSReg( 4*AT+0, getACC( AT, 0 ) ); + putVSReg( 4*AT+1, getACC( AT, 1 ) ); + putVSReg( 4*AT+2, getACC( AT, 2 ) ); + putVSReg( 4*AT+3, getACC( AT, 3 ) ); + + } else if ((opc1 == 0x1F) && (opc2 == 0xB1) && (bit11_15 == 3) && !prefix) { + // FYI, this is not a prefix instruction + IRTemp zero128 = newTemp(Ity_V128); + + DIP("xxsetaccz %u\n", AT); + + assign( zero128, binop(Iop_64HLtoV128, mkU64( 0 ), mkU64( 0 ) ) ); + putACC( AT, 0, mkexpr( zero128 ) ); + putACC( AT, 1, mkexpr( zero128 ) ); + putACC( AT, 2, mkexpr( zero128 ) ); + putACC( AT, 3, mkexpr( zero128 ) ); + + } else if ((opc1 == 0x1F) && (opc2 == 0xB1) && (bit11_15 == 1) && !prefix) { + // FYI, this is not a prefix instruction + DIP("xxmtacc %u\n", AT); + + putACC( AT, 0, getVSReg( 4*AT+0 ) ); + putACC( AT, 1, getVSReg( 4*AT+1 ) ); + putACC( AT, 2, getVSReg( 4*AT+2 ) ); + putACC( AT, 3, getVSReg( 4*AT+3 ) ); + + } else { + vex_printf("ERROR, dis_vsx_accumulator_prefix, Unknown instruction theInstr = 0x%x\n", + theInstr); + return False; + } + + return True; +} + static Int dis_nop_prefix ( UInt prefix, UInt theInstr ) { Bool is_prefix = prefix_instruction( prefix ); @@ -33990,6 +35041,46 @@ DisResult disInstr_PPC_WRK ( goto decode_failure; default: + ; // Fall thru to the next check + } + + if ( !prefix_instruction( prefix ) ) { + if ( !(allow_isa_3_1) ) goto decode_noIsa3_1; + opc2 = IFIELD( theInstr, 3, 8 ); + if ((opc2 == XVI4GER8) || // xvi4ger8 + (opc2 == XVI4GER8PP) || // xvi4ger8pp + (opc2 == XVI8GER4) || // xvi8ger4 + (opc2 == XVI8GER4PP) || // xvi8ger4pp + (opc2 == XVF16GER2) || // xvf16ger2 + (opc2 == XVF16GER2PP) || // xvf16ger2pp + (opc2 == XVF16GER2PN) || // xvf16ger2pn + (opc2 == XVF16GER2NP) || // xvf16ger2np + (opc2 == XVF16GER2NN) || // xvf16ger2nn + (opc2 == XVI16GER2S) || // xvi16ger2s + (opc2 == XVI16GER2SPP) || // xvi16ger2spp + (opc2 == XVF32GER) || // xvf32ger + (opc2 == XVF32GERPP) || // xvf32gerpp + (opc2 == XVF32GERPN) || // xvf32gerpn + (opc2 == XVF32GERNP) || // xvf32gernp + (opc2 == XVF32GERNN) || // xvf32gernn + (opc2 == XVF64GER) || // xvf64ger + (opc2 == XVF64GERPP) || // xvf64gerpp + (opc2 == XVF64GERPN) || // xvf64gerpn + (opc2 == XVF64GERNP) || // xvf64gernp + (opc2 == XVF64GERNN)) { // xvf64gernn + if (dis_vsx_accumulator_prefix( prefix, theInstr, abiinfo ) ) + goto decode_success; + goto decode_failure; + } else { + vex_printf("ERROR, dis_vsx_accumulator_prefix, unknown opc2 = 0x%x\n", + opc2); + goto decode_failure; + } + + } else { + // lxacc + if (dis_vsx_accumulator_prefix( prefix, theInstr, abiinfo ) ) + goto decode_success; goto decode_failure; } break; @@ -34242,6 +35333,13 @@ DisResult disInstr_PPC_WRK ( goto decode_failure; case 0x3F: + if ( prefix_instruction( prefix ) ) { // stxacc + if ( !(allow_isa_3_1) ) goto decode_noIsa3_1; + if (dis_vsx_accumulator_prefix( prefix, theInstr, abiinfo ) ) + goto decode_success; + goto decode_failure; + } + if (!allow_F) goto decode_noF; /* Instrs using opc[1:5] never overlap instrs using opc[1:10], so we can simply fall through the first switch statement */ @@ -34545,8 +35643,13 @@ DisResult disInstr_PPC_WRK ( } break; - case 0x1F: + if ( prefix_instruction( prefix ) ) { // stxacc + if ( !(allow_isa_3_1) ) goto decode_noIsa3_1; + if (dis_vsx_accumulator_prefix( prefix, theInstr, abiinfo ) ) + goto decode_success; + goto decode_failure; + } /* For arith instns, bit10 is the OE flag (overflow enable) */ @@ -34608,6 +35711,13 @@ DisResult disInstr_PPC_WRK ( opc2 = IFIELD(theInstr, 1, 10); switch (opc2) { + case 0xB1: // xxmfacc, xxsetaccz + { + if ( !(allow_isa_3_1) ) goto decode_noIsa3_1; + if (dis_vsx_accumulator_prefix( prefix, theInstr, abiinfo ) ) + goto decode_success; + goto decode_failure; + } case 0xDB: // brh case 0x9B: // brw @@ -34883,7 +35993,7 @@ DisResult disInstr_PPC_WRK ( // if allow_V is not set, we'll skip trying to decode. if (!allow_V) goto decode_noV; - if (dis_vx_load( prefix, theInstr )) goto decode_success; + if (dis_vx_load( prefix, theInstr )) goto decode_success; goto decode_failure; case 0x00D: // lxvrbx @@ -35404,7 +36514,8 @@ DisResult disInstr_PPC_WRK ( case 0x0C6: case 0x1C6: case 0x2C6: // vcmpeqfp, vcmpgefp, vcmpgtfp case 0x3C6: // vcmpbfp if (!allow_V) goto decode_noV; - if (dis_av_fp_cmp( prefix, theInstr )) goto decode_success; + if (dis_av_fp_cmp( prefix, theInstr )) + goto decode_success; goto decode_failure; default: diff --git a/VEX/priv/host_ppc_isel.c b/VEX/priv/host_ppc_isel.c index 9c79b6fa13..3c76feb2d5 100644 --- a/VEX/priv/host_ppc_isel.c +++ b/VEX/priv/host_ppc_isel.c @@ -1,4 +1,5 @@ + /*---------------------------------------------------------------*/ /*--- begin host_ppc_isel.c ---*/ /*---------------------------------------------------------------*/ @@ -638,6 +639,10 @@ PPCAMode* genGuestArrayOffset ( ISelEnv* env, IRRegArray* descr, Int nElems = descr->nElems; Int shift = 0; + /* MAX is somewhat arbitrarily, needs to be at least + 3 times the size of VexGuestPPC64State */ +#define MAX 6500 + /* Throw out any cases we don't need. In theory there might be a day where we need to handle others, but not today. */ @@ -652,8 +657,11 @@ PPCAMode* genGuestArrayOffset ( ISelEnv* env, IRRegArray* descr, if (bias < -100 || bias > 100) /* somewhat arbitrarily */ vpanic("genGuestArrayOffset(ppc host)(3)"); - if (descr->base < 0 || descr->base > 5000) /* somewhat arbitrarily */ + if (descr->base < 0 || descr->base > MAX) { /* somewhat arbitrarily */ + vex_printf("ERROR: descr->base = %d, is greater then maximum = %d\n", + descr->base, MAX); vpanic("genGuestArrayOffset(ppc host)(4)"); + } /* Compute off into a reg, %off. Then return: @@ -684,6 +692,7 @@ PPCAMode* genGuestArrayOffset ( ISelEnv* env, IRRegArray* descr, PPCRH_Imm(True/*signed*/, toUShort(descr->base)))); return PPCAMode_RR( GuestStatePtr(env->mode64), rtmp ); +#undef MAX } diff --git a/VEX/pub/libvex_guest_ppc32.h b/VEX/pub/libvex_guest_ppc32.h index 8e7fbaa4b9..eb1f728b74 100644 --- a/VEX/pub/libvex_guest_ppc32.h +++ b/VEX/pub/libvex_guest_ppc32.h @@ -251,9 +251,48 @@ typedef /* 1396 */ UInt guest_TEXASRU; // Transaction EXception And Summary Register Upper /* 1400 */ UInt guest_PSPB; // Problem State Priority Boost register /* 1404 */ ULong guest_DSCR; // Data Stream Control register - /* Padding to make it have an 16-aligned size */ - /* 1408 */ UInt padding3; - /* 1412 */ UInt padding4; + + /* ISA 3.1 Accumulators. There are eight accumulators each contains four + 128-bit rows. Declare each acculator and row explicitly, then use a + helper to map ACC[i][row] to the explicitly declared entry. + Technically not supported in 32-bit mode but needs to be defined. */ + /* 1412 */ U128 guest_ACC_0_r0; + /* 1428 */ U128 guest_ACC_0_r1; + /* 1444 */ U128 guest_ACC_0_r2; + /* 1460 */ U128 guest_ACC_0_r3; + /* 1476 */ U128 guest_ACC_1_r0; + /* 1492 */ U128 guest_ACC_1_r1; + /* 1508 */ U128 guest_ACC_1_r2; + /* 1524 */ U128 guest_ACC_1_r3; + /* 1540 */ U128 guest_ACC_2_r0; + /* 1556 */ U128 guest_ACC_2_r1; + /* 1572 */ U128 guest_ACC_2_r2; + /* 1588 */ U128 guest_ACC_2_r3; + /* 1604 */ U128 guest_ACC_3_r0; + /* 1620 */ U128 guest_ACC_3_r1; + /* 1636 */ U128 guest_ACC_3_r2; + /* 1652 */ U128 guest_ACC_3_r3; + /* 1668 */ U128 guest_ACC_4_r0; + /* 1684 */ U128 guest_ACC_4_r1; + /* 1700 */ U128 guest_ACC_4_r2; + /* 1716 */ U128 guest_ACC_4_r3; + /* 1732 */ U128 guest_ACC_5_r0; + /* 1748 */ U128 guest_ACC_5_r1; + /* 1780 */ U128 guest_ACC_5_r2; + /* 1796 */ U128 guest_ACC_5_r3; + /* 1812 */ U128 guest_ACC_6_r0; + /* 1828 */ U128 guest_ACC_6_r1; + /* 1844 */ U128 guest_ACC_6_r2; + /* 1860 */ U128 guest_ACC_6_r3; + /* 1876 */ U128 guest_ACC_7_r0; + /* 1892 */ U128 guest_ACC_7_r1; + /* 1908 */ U128 guest_ACC_7_r2; + /* 1924 */ U128 guest_ACC_7_r3; + + /* Padding to make it have an 16-aligned size */ + /* 1940 */ UInt padding2; +// /* 1944 */ UInt padding3; +// /* 1948 */ UInt padding4; } VexGuestPPC32State; diff --git a/VEX/pub/libvex_guest_ppc64.h b/VEX/pub/libvex_guest_ppc64.h index 278bfe8384..c8c9d072d4 100644 --- a/VEX/pub/libvex_guest_ppc64.h +++ b/VEX/pub/libvex_guest_ppc64.h @@ -278,25 +278,58 @@ typedef /* Needed for Darwin: CIA at the last SC insn. Used when backing up to restart a syscall that has been interrupted by a signal. */ - /* 1646 */ ULong guest_IP_AT_SYSCALL; + /* 1648 */ ULong guest_IP_AT_SYSCALL; /* SPRG3, which AIUI is readonly in user space. Needed for threading on AIX. */ - /* 1654 */ ULong guest_SPRG3_RO; - - /* 1662 */ ULong guest_TFHAR; // Transaction Failure Handler Address Register - /* 1670 */ ULong guest_TEXASR; // Transaction EXception And Summary Register - /* 1678 */ ULong guest_TFIAR; // Transaction Failure Instruction Address Register - /* 1686 */ ULong guest_PPR; // Program Priority register - /* 1694 */ UInt guest_TEXASRU; // Transaction EXception And Summary Register Upper - /* 1698 */ UInt guest_PSPB; // Problem State Priority Boost register - /* 1702 */ ULong guest_DSCR; // Data Stream Control register + /* 1656 */ ULong guest_SPRG3_RO; + + /* 1664 */ ULong guest_TFHAR; // Transaction Failure Handler Address Register + /* 1672 */ ULong guest_TEXASR; // Transaction EXception And Summary Register + /* 1680 */ ULong guest_TFIAR; // Transaction Failure Instruction Address Register + /* 1688 */ ULong guest_PPR; // Program Priority register + /* 1696 */ UInt guest_TEXASRU; // Transaction EXception And Summary Register Upper + /* 1700 */ UInt guest_PSPB; // Problem State Priority Boost register + /* 1704 */ ULong guest_DSCR; // Data Stream Control register + + /* The guest_ACC_entries must be in order and sequential. The helper + routines get_ACC_entry(), write_ACC_entry() calculate the offset of + the ACC entry based on a address of guest_ACC_0_r0. */ + /* 1712 */ U128 guest_ACC_0_r0; + /* 1728 */ U128 guest_ACC_0_r1; + /* 1744 */ U128 guest_ACC_0_r2; + /* 1760 */ U128 guest_ACC_0_r3; + /* 1776 */ U128 guest_ACC_1_r0; + /* 1792 */ U128 guest_ACC_1_r1; + /* 1808 */ U128 guest_ACC_1_r2; + /* 1824 */ U128 guest_ACC_1_r3; + /* 1840 */ U128 guest_ACC_2_r0; + /* 1856 */ U128 guest_ACC_2_r1; + /* 1872 */ U128 guest_ACC_2_r2; + /* 1888 */ U128 guest_ACC_2_r3; + /* 1904 */ U128 guest_ACC_3_r0; + /* 1920 */ U128 guest_ACC_3_r1; + /* 1936 */ U128 guest_ACC_3_r2; + /* 1952 */ U128 guest_ACC_3_r3; + /* 1968 */ U128 guest_ACC_4_r0; + /* 1984 */ U128 guest_ACC_4_r1; + /* 2000 */ U128 guest_ACC_4_r2; + /* 2016 */ U128 guest_ACC_4_r3; + /* 2032 */ U128 guest_ACC_5_r0; + /* 2048 */ U128 guest_ACC_5_r1; + /* 2064 */ U128 guest_ACC_5_r2; + /* 2080 */ U128 guest_ACC_5_r3; + /* 2096 */ U128 guest_ACC_6_r0; + /* 2112 */ U128 guest_ACC_6_r1; + /* 2128 */ U128 guest_ACC_6_r2; + /* 2144 */ U128 guest_ACC_6_r3; + /* 2160 */ U128 guest_ACC_7_r0; + /* 2176 */ U128 guest_ACC_7_r1; + /* 2192 */ U128 guest_ACC_7_r2; + /* 2208 */ U128 guest_ACC_7_r3; /* Padding to make it have an 16-aligned size */ - /* 1710 */ UInt padding1; - /* 1714 */ UInt padding2; - /* 1718 */ UInt padding3; - + /* 2222 UInt padding0; */ } VexGuestPPC64State; diff --git a/memcheck/mc_main.c b/memcheck/mc_main.c index 3afc9911a4..436ee46b4f 100644 --- a/memcheck/mc_main.c +++ b/memcheck/mc_main.c @@ -4523,7 +4523,7 @@ static UInt mb_get_origin_for_guest_offset ( ThreadId tid, static void mc_post_reg_write ( CorePart part, ThreadId tid, PtrdiffT offset, SizeT size) { -# define MAX_REG_WRITE_SIZE 1744 +# define MAX_REG_WRITE_SIZE 2264 UChar area[MAX_REG_WRITE_SIZE]; tl_assert(size <= MAX_REG_WRITE_SIZE); VG_(memset)(area, V_BITS8_DEFINED, size);